1 /*
2  * Copyright © 2008,2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Chris Wilson <chris@chris-wilson.co.uk>
26  *
27  */
28 
29 #include <drm/drmP.h>
30 #include <drm/i915_drm.h>
31 #include "i915_drv.h"
32 #include "i915_trace.h"
33 #include "intel_drv.h"
34 #include <linux/dma_remapping.h>
35 #include <linux/log2.h>
36 #include <linux/pagemap.h>
37 #include <linux/err.h>
38 
39 #define  __EXEC_OBJECT_HAS_PIN (1<<31)
40 #define  __EXEC_OBJECT_HAS_FENCE (1<<30)
41 #define  __EXEC_OBJECT_NEEDS_BIAS (1<<28)
42 
43 #define BATCH_OFFSET_BIAS (256*1024)
44 
45 struct eb_vmas {
46 	struct list_head vmas;
47 	int and;
48 	union {
49 		struct i915_vma *lut[0];
50 		struct hlist_head buckets[0];
51 	};
52 };
53 
54 static struct eb_vmas *
eb_create(struct drm_i915_gem_execbuffer2 * args)55 eb_create(struct drm_i915_gem_execbuffer2 *args)
56 {
57 	struct eb_vmas *eb = NULL;
58 
59 	if (args->flags & I915_EXEC_HANDLE_LUT) {
60 		unsigned size = args->buffer_count;
61 		size *= sizeof(struct i915_vma *);
62 		size += sizeof(struct eb_vmas);
63 		eb = kmalloc(size, GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
64 	}
65 
66 	if (eb == NULL) {
67 		unsigned size = args->buffer_count;
68 		unsigned count = PAGE_SIZE / sizeof(struct hlist_head) / 2;
69 		BUILD_BUG_ON_NOT_POWER_OF_2(PAGE_SIZE / sizeof(struct hlist_head));
70 		while (count > 2*size)
71 			count >>= 1;
72 		eb = kzalloc(count*sizeof(struct hlist_head) +
73 			     sizeof(struct eb_vmas),
74 			     GFP_TEMPORARY);
75 		if (eb == NULL)
76 			return eb;
77 
78 		eb->and = count - 1;
79 	} else
80 		eb->and = -args->buffer_count;
81 
82 	INIT_LIST_HEAD(&eb->vmas);
83 	return eb;
84 }
85 
86 static void
eb_reset(struct eb_vmas * eb)87 eb_reset(struct eb_vmas *eb)
88 {
89 	if (eb->and >= 0)
90 		memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
91 }
92 
93 static int
eb_lookup_vmas(struct eb_vmas * eb,struct drm_i915_gem_exec_object2 * exec,const struct drm_i915_gem_execbuffer2 * args,struct i915_address_space * vm,struct drm_file * file)94 eb_lookup_vmas(struct eb_vmas *eb,
95 	       struct drm_i915_gem_exec_object2 *exec,
96 	       const struct drm_i915_gem_execbuffer2 *args,
97 	       struct i915_address_space *vm,
98 	       struct drm_file *file)
99 {
100 	struct drm_i915_private *dev_priv = vm->dev->dev_private;
101 	struct drm_i915_gem_object *obj;
102 	struct list_head objects;
103 	int i, ret;
104 
105 	INIT_LIST_HEAD(&objects);
106 	spin_lock(&file->table_lock);
107 	/* Grab a reference to the object and release the lock so we can lookup
108 	 * or create the VMA without using GFP_ATOMIC */
109 	for (i = 0; i < args->buffer_count; i++) {
110 		obj = to_intel_bo((struct drm_gem_object *)
111 		    idr_find(&file->object_idr, exec[i].handle));
112 		if (obj == NULL) {
113 			spin_unlock(&file->table_lock);
114 			DRM_DEBUG("Invalid object handle %d at index %d\n",
115 				   exec[i].handle, i);
116 			ret = -ENOENT;
117 			goto err;
118 		}
119 
120 		if (!list_empty(&obj->obj_exec_link)) {
121 			spin_unlock(&file->table_lock);
122 			DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n",
123 				   obj, exec[i].handle, i);
124 			ret = -EINVAL;
125 			goto err;
126 		}
127 
128 		drm_gem_object_reference(&obj->base);
129 		list_add_tail(&obj->obj_exec_link, &objects);
130 	}
131 	spin_unlock(&file->table_lock);
132 
133 	i = 0;
134 	while (!list_empty(&objects)) {
135 		struct i915_vma *vma;
136 		struct i915_address_space *bind_vm = vm;
137 
138 		if (exec[i].flags & EXEC_OBJECT_NEEDS_GTT &&
139 		    USES_FULL_PPGTT(vm->dev)) {
140 			ret = -EINVAL;
141 			goto err;
142 		}
143 
144 		/* If we have secure dispatch, or the userspace assures us that
145 		 * they know what they're doing, use the GGTT VM.
146 		 */
147 		if (((args->flags & I915_EXEC_SECURE) &&
148 		    (i == (args->buffer_count - 1))))
149 			bind_vm = &dev_priv->gtt.base;
150 
151 		obj = list_first_entry(&objects,
152 				       struct drm_i915_gem_object,
153 				       obj_exec_link);
154 
155 		/*
156 		 * NOTE: We can leak any vmas created here when something fails
157 		 * later on. But that's no issue since vma_unbind can deal with
158 		 * vmas which are not actually bound. And since only
159 		 * lookup_or_create exists as an interface to get at the vma
160 		 * from the (obj, vm) we don't run the risk of creating
161 		 * duplicated vmas for the same vm.
162 		 */
163 		vma = i915_gem_obj_lookup_or_create_vma(obj, bind_vm);
164 		if (IS_ERR(vma)) {
165 			DRM_DEBUG("Failed to lookup VMA\n");
166 			ret = PTR_ERR(vma);
167 			goto err;
168 		}
169 
170 		/* Transfer ownership from the objects list to the vmas list. */
171 		list_add_tail(&vma->exec_list, &eb->vmas);
172 		list_del_init(&obj->obj_exec_link);
173 
174 		vma->exec_entry = &exec[i];
175 		if (eb->and < 0) {
176 			eb->lut[i] = vma;
177 		} else {
178 			uint32_t handle = args->flags & I915_EXEC_HANDLE_LUT ? i : exec[i].handle;
179 			vma->exec_handle = handle;
180 			hlist_add_head(&vma->exec_node,
181 				       &eb->buckets[handle & eb->and]);
182 		}
183 		++i;
184 	}
185 
186 	return 0;
187 
188 
189 err:
190 	while (!list_empty(&objects)) {
191 		obj = list_first_entry(&objects,
192 				       struct drm_i915_gem_object,
193 				       obj_exec_link);
194 		list_del_init(&obj->obj_exec_link);
195 		drm_gem_object_unreference(&obj->base);
196 	}
197 	/*
198 	 * Objects already transfered to the vmas list will be unreferenced by
199 	 * eb_destroy.
200 	 */
201 
202 	return ret;
203 }
204 
eb_get_vma(struct eb_vmas * eb,unsigned long handle)205 static struct i915_vma *eb_get_vma(struct eb_vmas *eb, unsigned long handle)
206 {
207 	if (eb->and < 0) {
208 		if (handle >= -eb->and)
209 			return NULL;
210 		return eb->lut[handle];
211 	} else {
212 		struct hlist_head *head;
213 		struct hlist_node *node;
214 
215 		head = &eb->buckets[handle & eb->and];
216 		hlist_for_each(node, head) {
217 			struct i915_vma *vma;
218 
219 			vma = hlist_entry(node, struct i915_vma, exec_node);
220 			if (vma->exec_handle == handle)
221 				return vma;
222 		}
223 		return NULL;
224 	}
225 }
226 
227 static void
i915_gem_execbuffer_unreserve_vma(struct i915_vma * vma)228 i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma)
229 {
230 	struct drm_i915_gem_exec_object2 *entry;
231 	struct drm_i915_gem_object *obj = vma->obj;
232 
233 	if (!drm_mm_node_allocated(&vma->node))
234 		return;
235 
236 	entry = vma->exec_entry;
237 
238 	if (entry->flags & __EXEC_OBJECT_HAS_FENCE)
239 		i915_gem_object_unpin_fence(obj);
240 
241 	if (entry->flags & __EXEC_OBJECT_HAS_PIN)
242 		vma->pin_count--;
243 
244 	entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);
245 }
246 
eb_destroy(struct eb_vmas * eb)247 static void eb_destroy(struct eb_vmas *eb)
248 {
249 	while (!list_empty(&eb->vmas)) {
250 		struct i915_vma *vma;
251 
252 		vma = list_first_entry(&eb->vmas,
253 				       struct i915_vma,
254 				       exec_list);
255 		list_del_init(&vma->exec_list);
256 		i915_gem_execbuffer_unreserve_vma(vma);
257 		drm_gem_object_unreference(&vma->obj->base);
258 	}
259 	kfree(eb);
260 }
261 
use_cpu_reloc(struct drm_i915_gem_object * obj)262 static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
263 {
264 	return (HAS_LLC(obj->base.dev) ||
265 		obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
266 		!obj->map_and_fenceable ||
267 		obj->cache_level != I915_CACHE_NONE);
268 }
269 
270 #ifdef __NetBSD__
271 #  define	__gtt_iomem
272 #  define	__iomem	__gtt_iomem
273 
274 static inline void
iowrite32(uint32_t value,uint32_t __gtt_iomem * ptr)275 iowrite32(uint32_t value, uint32_t __gtt_iomem *ptr)
276 {
277 
278 	__insn_barrier();
279 	*ptr = value;
280 }
281 #endif
282 
283 static int
relocate_entry_cpu(struct drm_i915_gem_object * obj,struct drm_i915_gem_relocation_entry * reloc)284 relocate_entry_cpu(struct drm_i915_gem_object *obj,
285 		   struct drm_i915_gem_relocation_entry *reloc)
286 {
287 	struct drm_device *dev = obj->base.dev;
288 	uint32_t page_offset = offset_in_page(reloc->offset);
289 	char *vaddr;
290 	int ret;
291 
292 	ret = i915_gem_object_set_to_cpu_domain(obj, true);
293 	if (ret)
294 		return ret;
295 
296 	vaddr = kmap_atomic(i915_gem_object_get_page(obj,
297 				reloc->offset >> PAGE_SHIFT));
298 	*(uint32_t *)(vaddr + page_offset) = reloc->delta;
299 
300 	if (INTEL_INFO(dev)->gen >= 8) {
301 		page_offset = offset_in_page(page_offset + sizeof(uint32_t));
302 
303 		if (page_offset == 0) {
304 			kunmap_atomic(vaddr);
305 			vaddr = kmap_atomic(i915_gem_object_get_page(obj,
306 			    (reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT));
307 		}
308 
309 		*(uint32_t *)(vaddr + page_offset) = 0;
310 	}
311 
312 	kunmap_atomic(vaddr);
313 
314 	return 0;
315 }
316 
317 static int
relocate_entry_gtt(struct drm_i915_gem_object * obj,struct drm_i915_gem_relocation_entry * reloc)318 relocate_entry_gtt(struct drm_i915_gem_object *obj,
319 		   struct drm_i915_gem_relocation_entry *reloc)
320 {
321 	struct drm_device *dev = obj->base.dev;
322 	struct drm_i915_private *dev_priv = dev->dev_private;
323 	uint32_t __iomem *reloc_entry;
324 	void __iomem *reloc_page;
325 	int ret;
326 
327 	ret = i915_gem_object_set_to_gtt_domain(obj, true);
328 	if (ret)
329 		return ret;
330 
331 	ret = i915_gem_object_put_fence(obj);
332 	if (ret)
333 		return ret;
334 
335 	/* Map the page containing the relocation we're going to perform.  */
336 	reloc->offset += i915_gem_obj_ggtt_offset(obj);
337 	reloc_page = io_mapping_map_atomic_wc(dev_priv->gtt.mappable,
338 			reloc->offset & PAGE_MASK);
339 	reloc_entry = (uint32_t __iomem *)
340 		((char __iomem *)reloc_page + offset_in_page(reloc->offset));
341 	iowrite32(reloc->delta, reloc_entry);
342 
343 	if (INTEL_INFO(dev)->gen >= 8) {
344 		reloc_entry += 1;
345 
346 		if (offset_in_page(reloc->offset + sizeof(uint32_t)) == 0) {
347 #ifdef __NetBSD__
348 			io_mapping_unmap_atomic(dev_priv->gtt.mappable,
349 			    reloc_page);
350 #else
351 			io_mapping_unmap_atomic(reloc_page);
352 #endif
353 			reloc_page = io_mapping_map_atomic_wc(
354 					dev_priv->gtt.mappable,
355 					reloc->offset + sizeof(uint32_t));
356 			reloc_entry = reloc_page;
357 		}
358 
359 		iowrite32(0, reloc_entry);
360 	}
361 
362 #ifdef __NetBSD__
363 	io_mapping_unmap_atomic(dev_priv->gtt.mappable,
364 	    reloc_page);
365 #else
366 	io_mapping_unmap_atomic(reloc_page);
367 #endif
368 
369 	return 0;
370 }
371 
372 #ifdef __NetBSD__
373 #  undef	__gtt_iomem
374 #  undef	__iomem
375 #endif
376 
377 static int
i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object * obj,struct eb_vmas * eb,struct drm_i915_gem_relocation_entry * reloc)378 i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
379 				   struct eb_vmas *eb,
380 				   struct drm_i915_gem_relocation_entry *reloc)
381 {
382 	struct drm_device *dev = obj->base.dev;
383 	struct drm_gem_object *target_obj;
384 	struct drm_i915_gem_object *target_i915_obj;
385 	struct i915_vma *target_vma;
386 	uint32_t target_offset;
387 	int ret;
388 
389 	/* we've already hold a reference to all valid objects */
390 	target_vma = eb_get_vma(eb, reloc->target_handle);
391 	if (unlikely(target_vma == NULL))
392 		return -ENOENT;
393 	target_i915_obj = target_vma->obj;
394 	target_obj = &target_vma->obj->base;
395 
396 	target_offset = target_vma->node.start;
397 
398 	/* Sandybridge PPGTT errata: We need a global gtt mapping for MI and
399 	 * pipe_control writes because the gpu doesn't properly redirect them
400 	 * through the ppgtt for non_secure batchbuffers. */
401 	if (unlikely(IS_GEN6(dev) &&
402 	    reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
403 	    !target_i915_obj->has_global_gtt_mapping)) {
404 		struct i915_vma *vma =
405 			list_first_entry(&target_i915_obj->vma_list,
406 					 typeof(*vma), vma_link);
407 		vma->bind_vma(vma, target_i915_obj->cache_level, GLOBAL_BIND);
408 	}
409 
410 	/* Validate that the target is in a valid r/w GPU domain */
411 	if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
412 		DRM_DEBUG("reloc with multiple write domains: "
413 			  "obj %p target %d offset %d "
414 			  "read %08x write %08x",
415 			  obj, reloc->target_handle,
416 			  (int) reloc->offset,
417 			  reloc->read_domains,
418 			  reloc->write_domain);
419 		return -EINVAL;
420 	}
421 	if (unlikely((reloc->write_domain | reloc->read_domains)
422 		     & ~I915_GEM_GPU_DOMAINS)) {
423 		DRM_DEBUG("reloc with read/write non-GPU domains: "
424 			  "obj %p target %d offset %d "
425 			  "read %08x write %08x",
426 			  obj, reloc->target_handle,
427 			  (int) reloc->offset,
428 			  reloc->read_domains,
429 			  reloc->write_domain);
430 		return -EINVAL;
431 	}
432 
433 	target_obj->pending_read_domains |= reloc->read_domains;
434 	target_obj->pending_write_domain |= reloc->write_domain;
435 
436 	/* If the relocation already has the right value in it, no
437 	 * more work needs to be done.
438 	 */
439 	if (target_offset == reloc->presumed_offset)
440 		return 0;
441 
442 	/* Check that the relocation address is valid... */
443 	if (unlikely(reloc->offset >
444 		obj->base.size - (INTEL_INFO(dev)->gen >= 8 ? 8 : 4))) {
445 		DRM_DEBUG("Relocation beyond object bounds: "
446 			  "obj %p target %d offset %d size %d.\n",
447 			  obj, reloc->target_handle,
448 			  (int) reloc->offset,
449 			  (int) obj->base.size);
450 		return -EINVAL;
451 	}
452 	if (unlikely(reloc->offset & 3)) {
453 		DRM_DEBUG("Relocation not 4-byte aligned: "
454 			  "obj %p target %d offset %d.\n",
455 			  obj, reloc->target_handle,
456 			  (int) reloc->offset);
457 		return -EINVAL;
458 	}
459 
460 #ifndef __NetBSD__              /* XXX atomic GEM reloc fast path */
461 	/* We can't wait for rendering with pagefaults disabled */
462 	if (obj->active && in_atomic())
463 		return -EFAULT;
464 #endif
465 
466 	reloc->delta += target_offset;
467 	if (use_cpu_reloc(obj))
468 		ret = relocate_entry_cpu(obj, reloc);
469 	else
470 		ret = relocate_entry_gtt(obj, reloc);
471 
472 	if (ret)
473 		return ret;
474 
475 	/* and update the user's relocation entry */
476 	reloc->presumed_offset = target_offset;
477 
478 	return 0;
479 }
480 
481 #ifndef __NetBSD__              /* XXX atomic GEM reloc fast path */
482 static int
i915_gem_execbuffer_relocate_vma(struct i915_vma * vma,struct eb_vmas * eb)483 i915_gem_execbuffer_relocate_vma(struct i915_vma *vma,
484 				 struct eb_vmas *eb)
485 {
486 #define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
487 	struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)];
488 	struct drm_i915_gem_relocation_entry __user *user_relocs;
489 	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
490 	int remain, ret;
491 
492 	user_relocs = to_user_ptr(entry->relocs_ptr);
493 
494 	remain = entry->relocation_count;
495 	while (remain) {
496 		struct drm_i915_gem_relocation_entry *r = stack_reloc;
497 		int count = remain;
498 		if (count > ARRAY_SIZE(stack_reloc))
499 			count = ARRAY_SIZE(stack_reloc);
500 		remain -= count;
501 
502 		if (__copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0])))
503 			return -EFAULT;
504 
505 		do {
506 			u64 offset = r->presumed_offset;
507 
508 			ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r);
509 			if (ret)
510 				return ret;
511 
512 			if (r->presumed_offset != offset &&
513 			    __copy_to_user_inatomic(&user_relocs->presumed_offset,
514 						    &r->presumed_offset,
515 						    sizeof(r->presumed_offset))) {
516 				return -EFAULT;
517 			}
518 
519 			user_relocs++;
520 			r++;
521 		} while (--count);
522 	}
523 
524 	return 0;
525 #undef N_RELOC
526 }
527 #endif
528 
529 static int
i915_gem_execbuffer_relocate_vma_slow(struct i915_vma * vma,struct eb_vmas * eb,struct drm_i915_gem_relocation_entry * relocs)530 i915_gem_execbuffer_relocate_vma_slow(struct i915_vma *vma,
531 				      struct eb_vmas *eb,
532 				      struct drm_i915_gem_relocation_entry *relocs)
533 {
534 	const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
535 	int i, ret;
536 
537 	for (i = 0; i < entry->relocation_count; i++) {
538 		ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i]);
539 		if (ret)
540 			return ret;
541 	}
542 
543 	return 0;
544 }
545 
546 static int
i915_gem_execbuffer_relocate(struct eb_vmas * eb)547 i915_gem_execbuffer_relocate(struct eb_vmas *eb)
548 {
549 #ifndef __NetBSD__
550 	struct drm_i915_gem_object *obj;
551 	struct i915_vma *vma;
552 #endif
553 	int ret = 0;
554 
555 #ifdef __NetBSD__              /* XXX atomic GEM reloc fast path */
556         ret = -EFAULT;
557 #else
558 	/* This is the fast path and we cannot handle a pagefault whilst
559 	 * holding the struct mutex lest the user pass in the relocations
560 	 * contained within a mmaped bo. For in such a case we, the page
561 	 * fault handler would call i915_gem_fault() and we would try to
562 	 * acquire the struct mutex again. Obviously this is bad and so
563 	 * lockdep complains vehemently.
564 	 */
565 	pagefault_disable();
566 	list_for_each_entry(vma, &eb->vmas, exec_list) {
567 		ret = i915_gem_execbuffer_relocate_vma(vma, eb);
568 		if (ret)
569 			break;
570 	}
571 	pagefault_enable();
572 #endif
573 
574 	return ret;
575 }
576 
577 static int
need_reloc_mappable(struct i915_vma * vma)578 need_reloc_mappable(struct i915_vma *vma)
579 {
580 	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
581 	return entry->relocation_count && !use_cpu_reloc(vma->obj) &&
582 		i915_is_ggtt(vma->vm);
583 }
584 
585 static int
i915_gem_execbuffer_reserve_vma(struct i915_vma * vma,struct intel_ring_buffer * ring,bool * need_reloc)586 i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
587 				struct intel_ring_buffer *ring,
588 				bool *need_reloc)
589 {
590 	struct drm_i915_gem_object *obj = vma->obj;
591 	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
592 	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
593 	bool need_fence;
594 	uint64_t flags;
595 	int ret;
596 
597 	flags = 0;
598 
599 	need_fence =
600 		has_fenced_gpu_access &&
601 		entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
602 		obj->tiling_mode != I915_TILING_NONE;
603 	if (need_fence || need_reloc_mappable(vma))
604 		flags |= PIN_MAPPABLE;
605 
606 	if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
607 		flags |= PIN_GLOBAL;
608 	if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
609 		flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
610 
611 	ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags);
612 	if (ret)
613 		return ret;
614 
615 	entry->flags |= __EXEC_OBJECT_HAS_PIN;
616 
617 	if (has_fenced_gpu_access) {
618 		if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
619 			ret = i915_gem_object_get_fence(obj);
620 			if (ret)
621 				return ret;
622 
623 			if (i915_gem_object_pin_fence(obj))
624 				entry->flags |= __EXEC_OBJECT_HAS_FENCE;
625 
626 			obj->pending_fenced_gpu_access = true;
627 		}
628 	}
629 
630 	if (entry->offset != vma->node.start) {
631 		entry->offset = vma->node.start;
632 		*need_reloc = true;
633 	}
634 
635 	if (entry->flags & EXEC_OBJECT_WRITE) {
636 		obj->base.pending_read_domains = I915_GEM_DOMAIN_RENDER;
637 		obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER;
638 	}
639 
640 	return 0;
641 }
642 
643 static bool
eb_vma_misplaced(struct i915_vma * vma,bool has_fenced_gpu_access)644 eb_vma_misplaced(struct i915_vma *vma, bool has_fenced_gpu_access)
645 {
646 	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
647 	struct drm_i915_gem_object *obj = vma->obj;
648 	bool need_fence, need_mappable;
649 
650 	need_fence =
651 		has_fenced_gpu_access &&
652 		entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
653 		obj->tiling_mode != I915_TILING_NONE;
654 	need_mappable = need_fence || need_reloc_mappable(vma);
655 
656 	WARN_ON((need_mappable || need_fence) &&
657 	       !i915_is_ggtt(vma->vm));
658 
659 	if (entry->alignment &&
660 	    vma->node.start & (entry->alignment - 1))
661 		return true;
662 
663 	if (need_mappable && !obj->map_and_fenceable)
664 		return true;
665 
666 	if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
667 	    vma->node.start < BATCH_OFFSET_BIAS)
668 		return true;
669 
670 	return false;
671 }
672 
673 static int
i915_gem_execbuffer_reserve(struct intel_ring_buffer * ring,struct list_head * vmas,bool * need_relocs)674 i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
675 			    struct list_head *vmas,
676 			    bool *need_relocs)
677 {
678 	struct drm_i915_gem_object *obj;
679 	struct i915_vma *vma;
680 	struct i915_address_space *vm;
681 	struct list_head ordered_vmas;
682 	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
683 	int retry;
684 
685 	if (list_empty(vmas))
686 		return 0;
687 
688 	vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
689 
690 	INIT_LIST_HEAD(&ordered_vmas);
691 	while (!list_empty(vmas)) {
692 		struct drm_i915_gem_exec_object2 *entry;
693 		bool need_fence, need_mappable;
694 
695 		vma = list_first_entry(vmas, struct i915_vma, exec_list);
696 		obj = vma->obj;
697 		entry = vma->exec_entry;
698 
699 		need_fence =
700 			has_fenced_gpu_access &&
701 			entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
702 			obj->tiling_mode != I915_TILING_NONE;
703 		need_mappable = need_fence || need_reloc_mappable(vma);
704 
705 		if (need_mappable)
706 			list_move(&vma->exec_list, &ordered_vmas);
707 		else
708 			list_move_tail(&vma->exec_list, &ordered_vmas);
709 
710 		obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
711 		obj->base.pending_write_domain = 0;
712 		obj->pending_fenced_gpu_access = false;
713 	}
714 	list_splice(&ordered_vmas, vmas);
715 
716 	/* Attempt to pin all of the buffers into the GTT.
717 	 * This is done in 3 phases:
718 	 *
719 	 * 1a. Unbind all objects that do not match the GTT constraints for
720 	 *     the execbuffer (fenceable, mappable, alignment etc).
721 	 * 1b. Increment pin count for already bound objects.
722 	 * 2.  Bind new objects.
723 	 * 3.  Decrement pin count.
724 	 *
725 	 * This avoid unnecessary unbinding of later objects in order to make
726 	 * room for the earlier objects *unless* we need to defragment.
727 	 */
728 	retry = 0;
729 	do {
730 		int ret = 0;
731 
732 		/* Unbind any ill-fitting objects or pin. */
733 		list_for_each_entry(vma, vmas, exec_list) {
734 			if (!drm_mm_node_allocated(&vma->node))
735 				continue;
736 
737 			if (eb_vma_misplaced(vma, has_fenced_gpu_access))
738 				ret = i915_vma_unbind(vma);
739 			else
740 				ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
741 			if (ret)
742 				goto err;
743 		}
744 
745 		/* Bind fresh objects */
746 		list_for_each_entry(vma, vmas, exec_list) {
747 			if (drm_mm_node_allocated(&vma->node))
748 				continue;
749 
750 			ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
751 			if (ret)
752 				goto err;
753 		}
754 
755 err:
756 		if (ret != -ENOSPC || retry++)
757 			return ret;
758 
759 		/* Decrement pin count for bound objects */
760 		list_for_each_entry(vma, vmas, exec_list)
761 			i915_gem_execbuffer_unreserve_vma(vma);
762 
763 		ret = i915_gem_evict_vm(vm, true);
764 		if (ret)
765 			return ret;
766 	} while (1);
767 }
768 
769 static int
i915_gem_execbuffer_relocate_slow(struct drm_device * dev,struct drm_i915_gem_execbuffer2 * args,struct drm_file * file,struct intel_ring_buffer * ring,struct eb_vmas * eb,struct drm_i915_gem_exec_object2 * exec)770 i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
771 				  struct drm_i915_gem_execbuffer2 *args,
772 				  struct drm_file *file,
773 				  struct intel_ring_buffer *ring,
774 				  struct eb_vmas *eb,
775 				  struct drm_i915_gem_exec_object2 *exec)
776 {
777 	struct drm_i915_gem_relocation_entry *reloc;
778 	struct i915_address_space *vm;
779 	struct i915_vma *vma;
780 	bool need_relocs;
781 	int *reloc_offset;
782 	int i, total, ret;
783 	unsigned count = args->buffer_count;
784 
785 	if (WARN_ON(list_empty(&eb->vmas)))
786 		return 0;
787 
788 	vm = list_first_entry(&eb->vmas, struct i915_vma, exec_list)->vm;
789 
790 	/* We may process another execbuffer during the unlock... */
791 	while (!list_empty(&eb->vmas)) {
792 		vma = list_first_entry(&eb->vmas, struct i915_vma, exec_list);
793 		list_del_init(&vma->exec_list);
794 		i915_gem_execbuffer_unreserve_vma(vma);
795 		drm_gem_object_unreference(&vma->obj->base);
796 	}
797 
798 	mutex_unlock(&dev->struct_mutex);
799 
800 	total = 0;
801 	for (i = 0; i < count; i++)
802 		total += exec[i].relocation_count;
803 
804 	reloc_offset = drm_malloc_ab(count, sizeof(*reloc_offset));
805 	reloc = drm_malloc_ab(total, sizeof(*reloc));
806 	if (reloc == NULL || reloc_offset == NULL) {
807 		drm_free_large(reloc);
808 		drm_free_large(reloc_offset);
809 		mutex_lock(&dev->struct_mutex);
810 		return -ENOMEM;
811 	}
812 
813 	total = 0;
814 	for (i = 0; i < count; i++) {
815 		struct drm_i915_gem_relocation_entry __user *user_relocs;
816 		u64 invalid_offset = (u64)-1;
817 		int j;
818 
819 		user_relocs = to_user_ptr(exec[i].relocs_ptr);
820 
821 		if (copy_from_user(reloc+total, user_relocs,
822 				   exec[i].relocation_count * sizeof(*reloc))) {
823 			ret = -EFAULT;
824 			mutex_lock(&dev->struct_mutex);
825 			goto err;
826 		}
827 
828 		/* As we do not update the known relocation offsets after
829 		 * relocating (due to the complexities in lock handling),
830 		 * we need to mark them as invalid now so that we force the
831 		 * relocation processing next time. Just in case the target
832 		 * object is evicted and then rebound into its old
833 		 * presumed_offset before the next execbuffer - if that
834 		 * happened we would make the mistake of assuming that the
835 		 * relocations were valid.
836 		 */
837 		for (j = 0; j < exec[i].relocation_count; j++) {
838 			if (__copy_to_user(&user_relocs[j].presumed_offset,
839 					   &invalid_offset,
840 					   sizeof(invalid_offset))) {
841 				ret = -EFAULT;
842 				mutex_lock(&dev->struct_mutex);
843 				goto err;
844 			}
845 		}
846 
847 		reloc_offset[i] = total;
848 		total += exec[i].relocation_count;
849 	}
850 
851 	ret = i915_mutex_lock_interruptible(dev);
852 	if (ret) {
853 		mutex_lock(&dev->struct_mutex);
854 		goto err;
855 	}
856 
857 	/* reacquire the objects */
858 	eb_reset(eb);
859 	ret = eb_lookup_vmas(eb, exec, args, vm, file);
860 	if (ret)
861 		goto err;
862 
863 	need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
864 	ret = i915_gem_execbuffer_reserve(ring, &eb->vmas, &need_relocs);
865 	if (ret)
866 		goto err;
867 
868 	list_for_each_entry(vma, &eb->vmas, exec_list) {
869 		int offset = vma->exec_entry - exec;
870 		ret = i915_gem_execbuffer_relocate_vma_slow(vma, eb,
871 							    reloc + reloc_offset[offset]);
872 		if (ret)
873 			goto err;
874 	}
875 
876 	/* Leave the user relocations as are, this is the painfully slow path,
877 	 * and we want to avoid the complication of dropping the lock whilst
878 	 * having buffers reserved in the aperture and so causing spurious
879 	 * ENOSPC for random operations.
880 	 */
881 
882 err:
883 	drm_free_large(reloc);
884 	drm_free_large(reloc_offset);
885 	return ret;
886 }
887 
888 static int
i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer * ring,struct list_head * vmas)889 i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
890 				struct list_head *vmas)
891 {
892 	struct i915_vma *vma;
893 	uint32_t flush_domains = 0;
894 	bool flush_chipset = false;
895 	int ret;
896 
897 	list_for_each_entry(vma, vmas, exec_list) {
898 		struct drm_i915_gem_object *obj = vma->obj;
899 		ret = i915_gem_object_sync(obj, ring);
900 		if (ret)
901 			return ret;
902 
903 		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
904 			flush_chipset |= i915_gem_clflush_object(obj, false);
905 
906 		flush_domains |= obj->base.write_domain;
907 	}
908 
909 	if (flush_chipset)
910 		i915_gem_chipset_flush(ring->dev);
911 
912 	if (flush_domains & I915_GEM_DOMAIN_GTT)
913 		wmb();
914 
915 	/* Unconditionally invalidate gpu caches and ensure that we do flush
916 	 * any residual writes from the previous batch.
917 	 */
918 	return intel_ring_invalidate_all_caches(ring);
919 }
920 
921 static bool
i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 * exec)922 i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
923 {
924 	if (exec->flags & __I915_EXEC_UNKNOWN_FLAGS)
925 		return false;
926 
927 	return ((exec->batch_start_offset | exec->batch_len) & 0x7) == 0;
928 }
929 
930 static int
validate_exec_list(struct drm_i915_gem_exec_object2 * exec,int count)931 validate_exec_list(struct drm_i915_gem_exec_object2 *exec,
932 		   int count)
933 {
934 	int i;
935 	unsigned relocs_total = 0;
936 	unsigned relocs_max = UINT_MAX / sizeof(struct drm_i915_gem_relocation_entry);
937 
938 	for (i = 0; i < count; i++) {
939 		char __user *ptr = to_user_ptr(exec[i].relocs_ptr);
940 		int length; /* limited by fault_in_pages_readable() */
941 
942 		if (exec[i].flags & __EXEC_OBJECT_UNKNOWN_FLAGS)
943 			return -EINVAL;
944 
945 		/* First check for malicious input causing overflow in
946 		 * the worst case where we need to allocate the entire
947 		 * relocation tree as a single array.
948 		 */
949 		if (exec[i].relocation_count > relocs_max - relocs_total)
950 			return -EINVAL;
951 		relocs_total += exec[i].relocation_count;
952 
953 		length = exec[i].relocation_count *
954 			sizeof(struct drm_i915_gem_relocation_entry);
955 		/*
956 		 * We must check that the entire relocation array is safe
957 		 * to read, but since we may need to update the presumed
958 		 * offsets during execution, check for full write access.
959 		 */
960 		if (!access_ok(VERIFY_WRITE, ptr, length))
961 			return -EFAULT;
962 
963 		if (likely(!i915.prefault_disable)) {
964 			if (fault_in_multipages_readable(ptr, length))
965 				return -EFAULT;
966 		}
967 	}
968 
969 	return 0;
970 }
971 
972 static struct i915_hw_context *
i915_gem_validate_context(struct drm_device * dev,struct drm_file * file,struct intel_ring_buffer * ring,const u32 ctx_id)973 i915_gem_validate_context(struct drm_device *dev, struct drm_file *file,
974 			  struct intel_ring_buffer *ring, const u32 ctx_id)
975 {
976 	struct i915_hw_context *ctx = NULL;
977 	struct i915_ctx_hang_stats *hs;
978 
979 	if (ring->id != RCS && ctx_id != DEFAULT_CONTEXT_ID)
980 		return ERR_PTR(-EINVAL);
981 
982 	ctx = i915_gem_context_get(file->driver_priv, ctx_id);
983 	if (IS_ERR(ctx))
984 		return ctx;
985 
986 	hs = &ctx->hang_stats;
987 	if (hs->banned) {
988 		DRM_DEBUG("Context %u tried to submit while banned\n", ctx_id);
989 		return ERR_PTR(-EIO);
990 	}
991 
992 	return ctx;
993 }
994 
995 static void
i915_gem_execbuffer_move_to_active(struct list_head * vmas,struct intel_ring_buffer * ring)996 i915_gem_execbuffer_move_to_active(struct list_head *vmas,
997 				   struct intel_ring_buffer *ring)
998 {
999 	struct i915_vma *vma;
1000 
1001 	list_for_each_entry(vma, vmas, exec_list) {
1002 		struct drm_i915_gem_object *obj = vma->obj;
1003 		u32 old_read = obj->base.read_domains;
1004 		u32 old_write = obj->base.write_domain;
1005 
1006 		obj->base.write_domain = obj->base.pending_write_domain;
1007 		if (obj->base.write_domain == 0)
1008 			obj->base.pending_read_domains |= obj->base.read_domains;
1009 		obj->base.read_domains = obj->base.pending_read_domains;
1010 		obj->fenced_gpu_access = obj->pending_fenced_gpu_access;
1011 
1012 		i915_vma_move_to_active(vma, ring);
1013 		if (obj->base.write_domain) {
1014 			obj->dirty = 1;
1015 			obj->last_write_seqno = intel_ring_get_seqno(ring);
1016 			/* check for potential scanout */
1017 			if (i915_gem_obj_ggtt_bound(obj) &&
1018 			    i915_gem_obj_to_ggtt(obj)->pin_count)
1019 				intel_mark_fb_busy(obj, ring);
1020 		}
1021 
1022 		trace_i915_gem_object_change_domain(obj, old_read, old_write);
1023 	}
1024 }
1025 
1026 static void
i915_gem_execbuffer_retire_commands(struct drm_device * dev,struct drm_file * file,struct intel_ring_buffer * ring,struct drm_i915_gem_object * obj)1027 i915_gem_execbuffer_retire_commands(struct drm_device *dev,
1028 				    struct drm_file *file,
1029 				    struct intel_ring_buffer *ring,
1030 				    struct drm_i915_gem_object *obj)
1031 {
1032 	/* Unconditionally force add_request to emit a full flush. */
1033 	ring->gpu_caches_dirty = true;
1034 
1035 	/* Add a breadcrumb for the completion of the batch buffer */
1036 	(void)__i915_add_request(ring, file, obj, NULL);
1037 }
1038 
1039 static int
i915_reset_gen7_sol_offsets(struct drm_device * dev,struct intel_ring_buffer * ring)1040 i915_reset_gen7_sol_offsets(struct drm_device *dev,
1041 			    struct intel_ring_buffer *ring)
1042 {
1043 	struct drm_i915_private *dev_priv = dev->dev_private;
1044 	int ret, i;
1045 
1046 	if (!IS_GEN7(dev) || ring != &dev_priv->ring[RCS])
1047 		return 0;
1048 
1049 	ret = intel_ring_begin(ring, 4 * 3);
1050 	if (ret)
1051 		return ret;
1052 
1053 	for (i = 0; i < 4; i++) {
1054 		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1055 		intel_ring_emit(ring, GEN7_SO_WRITE_OFFSET(i));
1056 		intel_ring_emit(ring, 0);
1057 	}
1058 
1059 	intel_ring_advance(ring);
1060 
1061 	return 0;
1062 }
1063 
1064 static struct drm_i915_gem_object *
eb_get_batch(struct eb_vmas * eb)1065 eb_get_batch(struct eb_vmas *eb)
1066 {
1067 	struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list);
1068 
1069 	/*
1070 	 * SNA is doing fancy tricks with compressing batch buffers, which leads
1071 	 * to negative relocation deltas. Usually that works out ok since the
1072 	 * relocate address is still positive, except when the batch is placed
1073 	 * very low in the GTT. Ensure this doesn't happen.
1074 	 *
1075 	 * Note that actual hangs have only been observed on gen7, but for
1076 	 * paranoia do it everywhere.
1077 	 */
1078 	vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
1079 
1080 	return vma->obj;
1081 }
1082 
1083 static int
i915_gem_do_execbuffer(struct drm_device * dev,void * data,struct drm_file * file,struct drm_i915_gem_execbuffer2 * args,struct drm_i915_gem_exec_object2 * exec)1084 i915_gem_do_execbuffer(struct drm_device *dev, void *data,
1085 		       struct drm_file *file,
1086 		       struct drm_i915_gem_execbuffer2 *args,
1087 		       struct drm_i915_gem_exec_object2 *exec)
1088 {
1089 	struct drm_i915_private *dev_priv = dev->dev_private;
1090 	struct eb_vmas *eb;
1091 	struct drm_i915_gem_object *batch_obj;
1092 	struct drm_clip_rect *cliprects = NULL;
1093 	struct intel_ring_buffer *ring;
1094 	struct i915_hw_context *ctx;
1095 	struct i915_address_space *vm;
1096 	const u32 ctx_id = i915_execbuffer2_get_context_id(*args);
1097 	u32 exec_start = args->batch_start_offset, exec_len;
1098 	u32 mask, flags;
1099 	int ret, mode, i;
1100 	bool need_relocs;
1101 
1102 	if (!i915_gem_check_execbuffer(args))
1103 		return -EINVAL;
1104 
1105 	ret = validate_exec_list(exec, args->buffer_count);
1106 	if (ret)
1107 		return ret;
1108 
1109 	flags = 0;
1110 	if (args->flags & I915_EXEC_SECURE) {
1111 #ifdef __NetBSD__
1112 		if (!file->is_master || !DRM_SUSER())
1113 		    return -EPERM;
1114 #else
1115 		if (!file->is_master || !capable(CAP_SYS_ADMIN))
1116 		    return -EPERM;
1117 #endif
1118 
1119 		flags |= I915_DISPATCH_SECURE;
1120 	}
1121 	if (args->flags & I915_EXEC_IS_PINNED)
1122 		flags |= I915_DISPATCH_PINNED;
1123 
1124 	if ((args->flags & I915_EXEC_RING_MASK) > I915_NUM_RINGS) {
1125 		DRM_DEBUG("execbuf with unknown ring: %d\n",
1126 			  (int)(args->flags & I915_EXEC_RING_MASK));
1127 		return -EINVAL;
1128 	}
1129 
1130 	if ((args->flags & I915_EXEC_RING_MASK) == I915_EXEC_DEFAULT)
1131 		ring = &dev_priv->ring[RCS];
1132 	else
1133 		ring = &dev_priv->ring[(args->flags & I915_EXEC_RING_MASK) - 1];
1134 
1135 	if (!intel_ring_initialized(ring)) {
1136 		DRM_DEBUG("execbuf with invalid ring: %d\n",
1137 			  (int)(args->flags & I915_EXEC_RING_MASK));
1138 		return -EINVAL;
1139 	}
1140 
1141 	mode = args->flags & I915_EXEC_CONSTANTS_MASK;
1142 	mask = I915_EXEC_CONSTANTS_MASK;
1143 	switch (mode) {
1144 	case I915_EXEC_CONSTANTS_REL_GENERAL:
1145 	case I915_EXEC_CONSTANTS_ABSOLUTE:
1146 	case I915_EXEC_CONSTANTS_REL_SURFACE:
1147 		if (ring == &dev_priv->ring[RCS] &&
1148 		    mode != dev_priv->relative_constants_mode) {
1149 			if (INTEL_INFO(dev)->gen < 4)
1150 				return -EINVAL;
1151 
1152 			if (INTEL_INFO(dev)->gen > 5 &&
1153 			    mode == I915_EXEC_CONSTANTS_REL_SURFACE)
1154 				return -EINVAL;
1155 
1156 			/* The HW changed the meaning on this bit on gen6 */
1157 			if (INTEL_INFO(dev)->gen >= 6)
1158 				mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
1159 		}
1160 		break;
1161 	default:
1162 		DRM_DEBUG("execbuf with unknown constants: %d\n", mode);
1163 		return -EINVAL;
1164 	}
1165 
1166 	if (args->buffer_count < 1) {
1167 		DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1168 		return -EINVAL;
1169 	}
1170 
1171 	if (args->num_cliprects != 0) {
1172 		if (ring != &dev_priv->ring[RCS]) {
1173 			DRM_DEBUG("clip rectangles are only valid with the render ring\n");
1174 			return -EINVAL;
1175 		}
1176 
1177 		if (INTEL_INFO(dev)->gen >= 5) {
1178 			DRM_DEBUG("clip rectangles are only valid on pre-gen5\n");
1179 			return -EINVAL;
1180 		}
1181 
1182 		if (args->num_cliprects > UINT_MAX / sizeof(*cliprects)) {
1183 			DRM_DEBUG("execbuf with %u cliprects\n",
1184 				  args->num_cliprects);
1185 			return -EINVAL;
1186 		}
1187 
1188 		cliprects = kcalloc(args->num_cliprects,
1189 				    sizeof(*cliprects),
1190 				    GFP_KERNEL);
1191 		if (cliprects == NULL) {
1192 			ret = -ENOMEM;
1193 			goto pre_mutex_err;
1194 		}
1195 
1196 		if (copy_from_user(cliprects,
1197 				   to_user_ptr(args->cliprects_ptr),
1198 				   sizeof(*cliprects)*args->num_cliprects)) {
1199 			ret = -EFAULT;
1200 			goto pre_mutex_err;
1201 		}
1202 	}
1203 
1204 	intel_runtime_pm_get(dev_priv);
1205 
1206 	ret = i915_mutex_lock_interruptible(dev);
1207 	if (ret)
1208 		goto pre_mutex_err;
1209 
1210 	if (dev_priv->ums.mm_suspended) {
1211 		mutex_unlock(&dev->struct_mutex);
1212 		ret = -EBUSY;
1213 		goto pre_mutex_err;
1214 	}
1215 
1216 	ctx = i915_gem_validate_context(dev, file, ring, ctx_id);
1217 	if (IS_ERR(ctx)) {
1218 		mutex_unlock(&dev->struct_mutex);
1219 		ret = PTR_ERR(ctx);
1220 		goto pre_mutex_err;
1221 	}
1222 
1223 	i915_gem_context_reference(ctx);
1224 
1225 	vm = ctx->vm;
1226 	if (!USES_FULL_PPGTT(dev))
1227 		vm = &dev_priv->gtt.base;
1228 
1229 	eb = eb_create(args);
1230 	if (eb == NULL) {
1231 		mutex_unlock(&dev->struct_mutex);
1232 		ret = -ENOMEM;
1233 		goto pre_mutex_err;
1234 	}
1235 
1236 	/* Look up object handles */
1237 	ret = eb_lookup_vmas(eb, exec, args, vm, file);
1238 	if (ret)
1239 		goto err;
1240 
1241 	/* take note of the batch buffer before we might reorder the lists */
1242 	batch_obj = eb_get_batch(eb);
1243 
1244 	/* Move the objects en-masse into the GTT, evicting if necessary. */
1245 	need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
1246 	ret = i915_gem_execbuffer_reserve(ring, &eb->vmas, &need_relocs);
1247 	if (ret)
1248 		goto err;
1249 
1250 	/* The objects are in their final locations, apply the relocations. */
1251 	if (need_relocs)
1252 		ret = i915_gem_execbuffer_relocate(eb);
1253 	if (ret) {
1254 		if (ret == -EFAULT) {
1255 			ret = i915_gem_execbuffer_relocate_slow(dev, args, file, ring,
1256 								eb, exec);
1257 			BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1258 		}
1259 		if (ret)
1260 			goto err;
1261 	}
1262 
1263 	/* Set the pending read domains for the batch buffer to COMMAND */
1264 	if (batch_obj->base.pending_write_domain) {
1265 		DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
1266 		ret = -EINVAL;
1267 		goto err;
1268 	}
1269 	batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
1270 
1271 	if (i915_needs_cmd_parser(ring)) {
1272 		ret = i915_parse_cmds(ring,
1273 				      batch_obj,
1274 				      args->batch_start_offset,
1275 				      file->is_master);
1276 		if (ret)
1277 			goto err;
1278 
1279 		/*
1280 		 * XXX: Actually do this when enabling batch copy...
1281 		 *
1282 		 * Set the DISPATCH_SECURE bit to remove the NON_SECURE bit
1283 		 * from MI_BATCH_BUFFER_START commands issued in the
1284 		 * dispatch_execbuffer implementations. We specifically don't
1285 		 * want that set when the command parser is enabled.
1286 		 */
1287 	}
1288 
1289 	/* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
1290 	 * batch" bit. Hence we need to pin secure batches into the global gtt.
1291 	 * hsw should have this fixed, but bdw mucks it up again. */
1292 	if (flags & I915_DISPATCH_SECURE &&
1293 	    !batch_obj->has_global_gtt_mapping) {
1294 		/* When we have multiple VMs, we'll need to make sure that we
1295 		 * allocate space first */
1296 		struct i915_vma *vma = i915_gem_obj_to_ggtt(batch_obj);
1297 		BUG_ON(!vma);
1298 		vma->bind_vma(vma, batch_obj->cache_level, GLOBAL_BIND);
1299 	}
1300 
1301 	if (flags & I915_DISPATCH_SECURE)
1302 		exec_start += i915_gem_obj_ggtt_offset(batch_obj);
1303 	else
1304 		exec_start += i915_gem_obj_offset(batch_obj, vm);
1305 
1306 	ret = i915_gem_execbuffer_move_to_gpu(ring, &eb->vmas);
1307 	if (ret)
1308 		goto err;
1309 
1310 	ret = i915_switch_context(ring, ctx);
1311 	if (ret)
1312 		goto err;
1313 
1314 	if (ring == &dev_priv->ring[RCS] &&
1315 	    mode != dev_priv->relative_constants_mode) {
1316 		ret = intel_ring_begin(ring, 4);
1317 		if (ret)
1318 				goto err;
1319 
1320 		intel_ring_emit(ring, MI_NOOP);
1321 		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1322 		intel_ring_emit(ring, INSTPM);
1323 		intel_ring_emit(ring, mask << 16 | mode);
1324 		intel_ring_advance(ring);
1325 
1326 		dev_priv->relative_constants_mode = mode;
1327 	}
1328 
1329 	if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
1330 		ret = i915_reset_gen7_sol_offsets(dev, ring);
1331 		if (ret)
1332 			goto err;
1333 	}
1334 
1335 
1336 	exec_len = args->batch_len;
1337 	if (cliprects) {
1338 		for (i = 0; i < args->num_cliprects; i++) {
1339 			ret = i915_emit_box(dev, &cliprects[i],
1340 					    args->DR1, args->DR4);
1341 			if (ret)
1342 				goto err;
1343 
1344 			ret = ring->dispatch_execbuffer(ring,
1345 							exec_start, exec_len,
1346 							flags);
1347 			if (ret)
1348 				goto err;
1349 		}
1350 	} else {
1351 		ret = ring->dispatch_execbuffer(ring,
1352 						exec_start, exec_len,
1353 						flags);
1354 		if (ret)
1355 			goto err;
1356 	}
1357 
1358 	trace_i915_gem_ring_dispatch(ring, intel_ring_get_seqno(ring), flags);
1359 
1360 	i915_gem_execbuffer_move_to_active(&eb->vmas, ring);
1361 	i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj);
1362 
1363 err:
1364 	/* the request owns the ref now */
1365 	i915_gem_context_unreference(ctx);
1366 	eb_destroy(eb);
1367 
1368 	mutex_unlock(&dev->struct_mutex);
1369 
1370 pre_mutex_err:
1371 	kfree(cliprects);
1372 
1373 	/* intel_gpu_busy should also get a ref, so it will free when the device
1374 	 * is really idle. */
1375 	intel_runtime_pm_put(dev_priv);
1376 	return ret;
1377 }
1378 
1379 /*
1380  * Legacy execbuffer just creates an exec2 list from the original exec object
1381  * list array and passes it to the real function.
1382  */
1383 int
i915_gem_execbuffer(struct drm_device * dev,void * data,struct drm_file * file)1384 i915_gem_execbuffer(struct drm_device *dev, void *data,
1385 		    struct drm_file *file)
1386 {
1387 	struct drm_i915_gem_execbuffer *args = data;
1388 	struct drm_i915_gem_execbuffer2 exec2;
1389 	struct drm_i915_gem_exec_object *exec_list = NULL;
1390 	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1391 	int ret, i;
1392 
1393 	if (args->buffer_count < 1) {
1394 		DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1395 		return -EINVAL;
1396 	}
1397 
1398 	/* Copy in the exec list from userland */
1399 	exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count);
1400 	exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
1401 	if (exec_list == NULL || exec2_list == NULL) {
1402 		DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1403 			  args->buffer_count);
1404 		drm_free_large(exec_list);
1405 		drm_free_large(exec2_list);
1406 		return -ENOMEM;
1407 	}
1408 	ret = copy_from_user(exec_list,
1409 			     to_user_ptr(args->buffers_ptr),
1410 			     sizeof(*exec_list) * args->buffer_count);
1411 	if (ret != 0) {
1412 		DRM_DEBUG("copy %d exec entries failed %d\n",
1413 			  args->buffer_count, ret);
1414 		drm_free_large(exec_list);
1415 		drm_free_large(exec2_list);
1416 		return -EFAULT;
1417 	}
1418 
1419 	for (i = 0; i < args->buffer_count; i++) {
1420 		exec2_list[i].handle = exec_list[i].handle;
1421 		exec2_list[i].relocation_count = exec_list[i].relocation_count;
1422 		exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
1423 		exec2_list[i].alignment = exec_list[i].alignment;
1424 		exec2_list[i].offset = exec_list[i].offset;
1425 		if (INTEL_INFO(dev)->gen < 4)
1426 			exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
1427 		else
1428 			exec2_list[i].flags = 0;
1429 	}
1430 
1431 	exec2.buffers_ptr = args->buffers_ptr;
1432 	exec2.buffer_count = args->buffer_count;
1433 	exec2.batch_start_offset = args->batch_start_offset;
1434 	exec2.batch_len = args->batch_len;
1435 	exec2.DR1 = args->DR1;
1436 	exec2.DR4 = args->DR4;
1437 	exec2.num_cliprects = args->num_cliprects;
1438 	exec2.cliprects_ptr = args->cliprects_ptr;
1439 	exec2.flags = I915_EXEC_RENDER;
1440 	i915_execbuffer2_set_context_id(exec2, 0);
1441 
1442 	ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list);
1443 	if (!ret) {
1444 		struct drm_i915_gem_exec_object __user *user_exec_list =
1445 			to_user_ptr(args->buffers_ptr);
1446 
1447 		/* Copy the new buffer offsets back to the user's exec list. */
1448 		for (i = 0; i < args->buffer_count; i++) {
1449 			ret = __copy_to_user(&user_exec_list[i].offset,
1450 					     &exec2_list[i].offset,
1451 					     sizeof(user_exec_list[i].offset));
1452 			if (ret) {
1453 				ret = -EFAULT;
1454 				DRM_DEBUG("failed to copy %d exec entries "
1455 					  "back to user (%d)\n",
1456 					  args->buffer_count, ret);
1457 				break;
1458 			}
1459 		}
1460 	}
1461 
1462 	drm_free_large(exec_list);
1463 	drm_free_large(exec2_list);
1464 	return ret;
1465 }
1466 
1467 int
i915_gem_execbuffer2(struct drm_device * dev,void * data,struct drm_file * file)1468 i915_gem_execbuffer2(struct drm_device *dev, void *data,
1469 		     struct drm_file *file)
1470 {
1471 	struct drm_i915_gem_execbuffer2 *args = data;
1472 	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1473 	int ret;
1474 
1475 	if (args->buffer_count < 1 ||
1476 	    args->buffer_count > UINT_MAX / sizeof(*exec2_list)) {
1477 		DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count);
1478 		return -EINVAL;
1479 	}
1480 
1481 	exec2_list = kmalloc(sizeof(*exec2_list)*args->buffer_count,
1482 			     GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
1483 	if (exec2_list == NULL)
1484 		exec2_list = drm_malloc_ab(sizeof(*exec2_list),
1485 					   args->buffer_count);
1486 	if (exec2_list == NULL) {
1487 		DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1488 			  args->buffer_count);
1489 		return -ENOMEM;
1490 	}
1491 	ret = copy_from_user(exec2_list,
1492 			     to_user_ptr(args->buffers_ptr),
1493 			     sizeof(*exec2_list) * args->buffer_count);
1494 	if (ret != 0) {
1495 		DRM_DEBUG("copy %d exec entries failed %d\n",
1496 			  args->buffer_count, ret);
1497 		drm_free_large(exec2_list);
1498 		return -EFAULT;
1499 	}
1500 
1501 	ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list);
1502 	if (!ret) {
1503 		/* Copy the new buffer offsets back to the user's exec list. */
1504 		struct drm_i915_gem_exec_object2 *user_exec_list =
1505 				   to_user_ptr(args->buffers_ptr);
1506 		int i;
1507 
1508 		for (i = 0; i < args->buffer_count; i++) {
1509 			ret = __copy_to_user(&user_exec_list[i].offset,
1510 					     &exec2_list[i].offset,
1511 					     sizeof(user_exec_list[i].offset));
1512 			if (ret) {
1513 				ret = -EFAULT;
1514 				DRM_DEBUG("failed to copy %d exec entries "
1515 					  "back to user\n",
1516 					  args->buffer_count);
1517 				break;
1518 			}
1519 		}
1520 	}
1521 
1522 	drm_free_large(exec2_list);
1523 	return ret;
1524 }
1525