1 /*
2  * Copyright © 2008 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *
26  */
27 
28 #ifdef __NetBSD__
29 #if 0				/* XXX uvmhist option?  */
30 #include "opt_uvmhist.h"
31 #endif
32 
33 #include <sys/types.h>
34 #include <sys/param.h>
35 
36 #include <uvm/uvm.h>
37 #include <uvm/uvm_extern.h>
38 #include <uvm/uvm_fault.h>
39 #include <uvm/uvm_page.h>
40 #include <uvm/uvm_pmap.h>
41 #include <uvm/uvm_prot.h>
42 
43 #include <drm/bus_dma_hacks.h>
44 #endif
45 
46 #include <drm/drmP.h>
47 #include <drm/drm_vma_manager.h>
48 #include <drm/i915_drm.h>
49 #include "i915_drv.h"
50 #include "i915_trace.h"
51 #include "intel_drv.h"
52 #include <linux/shmem_fs.h>
53 #include <linux/slab.h>
54 #include <linux/swap.h>
55 #include <linux/pci.h>
56 #include <linux/dma-buf.h>
57 #include <linux/errno.h>
58 #include <linux/time.h>
59 #include <linux/err.h>
60 #include <linux/bitops.h>
61 #include <linux/printk.h>
62 #include <asm/param.h>
63 #include <asm/page.h>
64 
65 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
66 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj,
67 						   bool force);
68 static __must_check int
69 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
70 			       bool readonly);
71 
72 static void i915_gem_write_fence(struct drm_device *dev, int reg,
73 				 struct drm_i915_gem_object *obj);
74 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
75 					 struct drm_i915_fence_reg *fence,
76 					 bool enable);
77 
78 static unsigned long i915_gem_inactive_count(struct shrinker *shrinker,
79 					     struct shrink_control *sc);
80 static unsigned long i915_gem_inactive_scan(struct shrinker *shrinker,
81 					    struct shrink_control *sc);
82 static unsigned long i915_gem_purge(struct drm_i915_private *dev_priv, long target);
83 static unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv);
84 static void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
85 static void i915_gem_retire_requests_ring(struct intel_ring_buffer *ring);
86 
cpu_cache_is_coherent(struct drm_device * dev,enum i915_cache_level level)87 static bool cpu_cache_is_coherent(struct drm_device *dev,
88 				  enum i915_cache_level level)
89 {
90 	return HAS_LLC(dev) || level != I915_CACHE_NONE;
91 }
92 
cpu_write_needs_clflush(struct drm_i915_gem_object * obj)93 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
94 {
95 	if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
96 		return true;
97 
98 	return obj->pin_display;
99 }
100 
i915_gem_object_fence_lost(struct drm_i915_gem_object * obj)101 static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
102 {
103 	if (obj->tiling_mode)
104 		i915_gem_release_mmap(obj);
105 
106 	/* As we do not have an associated fence register, we will force
107 	 * a tiling change if we ever need to acquire one.
108 	 */
109 	obj->fence_dirty = false;
110 	obj->fence_reg = I915_FENCE_REG_NONE;
111 }
112 
113 /* some bookkeeping */
i915_gem_info_add_obj(struct drm_i915_private * dev_priv,size_t size)114 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
115 				  size_t size)
116 {
117 	spin_lock(&dev_priv->mm.object_stat_lock);
118 	dev_priv->mm.object_count++;
119 	dev_priv->mm.object_memory += size;
120 	spin_unlock(&dev_priv->mm.object_stat_lock);
121 }
122 
i915_gem_info_remove_obj(struct drm_i915_private * dev_priv,size_t size)123 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
124 				     size_t size)
125 {
126 	spin_lock(&dev_priv->mm.object_stat_lock);
127 	dev_priv->mm.object_count--;
128 	dev_priv->mm.object_memory -= size;
129 	spin_unlock(&dev_priv->mm.object_stat_lock);
130 }
131 
132 static int
i915_gem_wait_for_error(struct i915_gpu_error * error)133 i915_gem_wait_for_error(struct i915_gpu_error *error)
134 {
135 	int ret;
136 
137 #define EXIT_COND (!i915_reset_in_progress(error) || \
138 		   i915_terminally_wedged(error))
139 	if (EXIT_COND)
140 		return 0;
141 
142 	/*
143 	 * Only wait 10 seconds for the gpu reset to complete to avoid hanging
144 	 * userspace. If it takes that long something really bad is going on and
145 	 * we should simply try to bail out and fail as gracefully as possible.
146 	 */
147 #ifdef __NetBSD__
148 	spin_lock(&error->reset_lock);
149 	DRM_SPIN_TIMED_WAIT_UNTIL(ret, &error->reset_queue, &error->reset_lock,
150 	    10*HZ, EXIT_COND);
151 	spin_unlock(&error->reset_lock);
152 #else
153 	ret = wait_event_interruptible_timeout(error->reset_queue,
154 					       EXIT_COND,
155 					       10*HZ);
156 #endif
157 	if (ret == 0) {
158 		DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
159 		return -EIO;
160 	} else if (ret < 0) {
161 		return ret;
162 	}
163 #undef EXIT_COND
164 
165 	return 0;
166 }
167 
i915_mutex_lock_interruptible(struct drm_device * dev)168 int i915_mutex_lock_interruptible(struct drm_device *dev)
169 {
170 	struct drm_i915_private *dev_priv = dev->dev_private;
171 	int ret;
172 
173 	ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
174 	if (ret)
175 		return ret;
176 
177 	ret = mutex_lock_interruptible(&dev->struct_mutex);
178 	if (ret)
179 		return ret;
180 
181 	WARN_ON(i915_verify_lists(dev));
182 	return 0;
183 }
184 
185 static inline bool
i915_gem_object_is_inactive(struct drm_i915_gem_object * obj)186 i915_gem_object_is_inactive(struct drm_i915_gem_object *obj)
187 {
188 	return i915_gem_obj_bound_any(obj) && !obj->active;
189 }
190 
191 int
i915_gem_init_ioctl(struct drm_device * dev,void * data,struct drm_file * file)192 i915_gem_init_ioctl(struct drm_device *dev, void *data,
193 		    struct drm_file *file)
194 {
195 	struct drm_i915_private *dev_priv = dev->dev_private;
196 	struct drm_i915_gem_init *args = data;
197 
198 	if (drm_core_check_feature(dev, DRIVER_MODESET))
199 		return -ENODEV;
200 
201 	if (args->gtt_start >= args->gtt_end ||
202 	    (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1))
203 		return -EINVAL;
204 
205 	/* GEM with user mode setting was never supported on ilk and later. */
206 	if (INTEL_INFO(dev)->gen >= 5)
207 		return -ENODEV;
208 
209 	mutex_lock(&dev->struct_mutex);
210 	i915_gem_setup_global_gtt(dev, args->gtt_start, args->gtt_end,
211 				  args->gtt_end);
212 	dev_priv->gtt.mappable_end = args->gtt_end;
213 	mutex_unlock(&dev->struct_mutex);
214 
215 	return 0;
216 }
217 
218 int
i915_gem_get_aperture_ioctl(struct drm_device * dev,void * data,struct drm_file * file)219 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
220 			    struct drm_file *file)
221 {
222 	struct drm_i915_private *dev_priv = dev->dev_private;
223 	struct drm_i915_gem_get_aperture *args = data;
224 	struct drm_i915_gem_object *obj;
225 	size_t pinned;
226 
227 	pinned = 0;
228 	mutex_lock(&dev->struct_mutex);
229 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
230 		if (i915_gem_obj_is_pinned(obj))
231 			pinned += i915_gem_obj_ggtt_size(obj);
232 	mutex_unlock(&dev->struct_mutex);
233 
234 	args->aper_size = dev_priv->gtt.base.total;
235 	args->aper_available_size = args->aper_size - pinned;
236 
237 	return 0;
238 }
239 
i915_gem_object_detach_phys(struct drm_i915_gem_object * obj)240 static void i915_gem_object_detach_phys(struct drm_i915_gem_object *obj)
241 {
242 	drm_dma_handle_t *phys = obj->phys_handle;
243 
244 	if (!phys)
245 		return;
246 
247 	if (obj->madv == I915_MADV_WILLNEED) {
248 #ifdef __NetBSD__
249 		const char *vaddr = phys->vaddr;
250 		unsigned i;
251 
252 		for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
253 			struct pglist pages;
254 			int error;
255 
256 			TAILQ_INIT(&pages);
257 			error = uvm_obj_wirepages(obj->base.gemo_shm_uao,
258 			    i*PAGE_SIZE, (i+1)*PAGE_SIZE, &pages);
259 			if (error)
260 				continue;
261 
262 			struct vm_page *const vm_page = TAILQ_FIRST(&pages);
263 			struct page *const page = container_of(vm_page,
264 			    struct page, p_vmp);
265 			char *const dst = kmap_atomic(page);
266 			(void)memcpy(dst, vaddr + (i*PAGE_SIZE), PAGE_SIZE);
267 			drm_clflush_virt_range(dst, PAGE_SIZE);
268 			kunmap_atomic(dst);
269 
270 			vm_page->flags &= ~PG_CLEAN;
271 			/* XXX mark page accessed */
272 			uvm_obj_unwirepages(obj->base.gemo_shm_uao,
273 			    i*PAGE_SIZE, (i+1)*PAGE_SIZE);
274 		}
275 #else
276 		struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
277 		char *vaddr = phys->vaddr;
278 		int i;
279 
280 		for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
281 			struct page *page = shmem_read_mapping_page(mapping, i);
282 			if (!IS_ERR(page)) {
283 				char *dst = kmap_atomic(page);
284 				memcpy(dst, vaddr, PAGE_SIZE);
285 				drm_clflush_virt_range(dst, PAGE_SIZE);
286 				kunmap_atomic(dst);
287 
288 				set_page_dirty(page);
289 				mark_page_accessed(page);
290 				page_cache_release(page);
291 			}
292 			vaddr += PAGE_SIZE;
293 		}
294 #endif
295 		i915_gem_chipset_flush(obj->base.dev);
296 	}
297 
298 #ifndef __NetBSD__
299 #ifdef CONFIG_X86
300 	set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE);
301 #endif
302 #endif
303 	drm_pci_free(obj->base.dev, phys);
304 	obj->phys_handle = NULL;
305 }
306 
307 int
i915_gem_object_attach_phys(struct drm_i915_gem_object * obj,int align)308 i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
309 			    int align)
310 {
311 	drm_dma_handle_t *phys;
312 #ifndef __NetBSD__
313 	struct address_space *mapping;
314 #endif
315 	char *vaddr;
316 	int i;
317 
318 	if (obj->phys_handle) {
319 		if ((unsigned long)obj->phys_handle->vaddr & (align -1))
320 			return -EBUSY;
321 
322 		return 0;
323 	}
324 
325 	if (obj->madv != I915_MADV_WILLNEED)
326 		return -EFAULT;
327 
328 #ifdef __NetBSD__
329 	if (obj->base.gemo_shm_uao == NULL)
330 		return -EINVAL;
331 #else
332 	if (obj->base.filp == NULL)
333 		return -EINVAL;
334 #endif
335 
336 	/* create a new object */
337 	phys = drm_pci_alloc(obj->base.dev, obj->base.size, align);
338 	if (!phys)
339 		return -ENOMEM;
340 
341 	vaddr = phys->vaddr;
342 #ifndef __NetBSD__
343 #ifdef CONFIG_X86
344 	set_memory_wc((unsigned long)vaddr, phys->size / PAGE_SIZE);
345 #endif
346 	mapping = file_inode(obj->base.filp)->i_mapping;
347 #endif
348 	for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
349 		struct page *page;
350 		char *src;
351 
352 #ifdef __NetBSD__
353 		struct pglist pages;
354 		int ret;
355 
356 		TAILQ_INIT(&pages);
357 
358 		/* XXX errno NetBSD->Linux */
359 		ret = -uvm_obj_wirepages(obj->base.gemo_shm_uao, i*PAGE_SIZE,
360 		    (i+1)*PAGE_SIZE, &pages);
361 		if (ret) {
362 			drm_pci_free(obj->base.dev, phys);
363 			return ret;
364 		}
365 		KASSERT(!TAILQ_EMPTY(&pages));
366 		page = container_of(TAILQ_FIRST(&pages), struct page, p_vmp);
367 #else
368 		page = shmem_read_mapping_page(mapping, i);
369 		if (IS_ERR(page)) {
370 #ifdef CONFIG_X86
371 			set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE);
372 #endif
373 			drm_pci_free(obj->base.dev, phys);
374 			return PTR_ERR(page);
375 		}
376 #endif	/* defined(__NetBSD__) */
377 
378 		src = kmap_atomic(page);
379 		memcpy(vaddr, src, PAGE_SIZE);
380 		kunmap_atomic(src);
381 
382 #ifdef __NetBSD__
383 		/* XXX mark page accessed */
384 		uvm_obj_unwirepages(obj->base.gemo_shm_uao, i*PAGE_SIZE,
385 		    (i + 1)*PAGE_SIZE);
386 #else
387 		mark_page_accessed(page);
388 		page_cache_release(page);
389 #endif
390 
391 		vaddr += PAGE_SIZE;
392 	}
393 
394 	obj->phys_handle = phys;
395 	return 0;
396 }
397 
398 static int
i915_gem_phys_pwrite(struct drm_i915_gem_object * obj,struct drm_i915_gem_pwrite * args,struct drm_file * file_priv)399 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
400 		     struct drm_i915_gem_pwrite *args,
401 		     struct drm_file *file_priv)
402 {
403 	struct drm_device *dev = obj->base.dev;
404 	void *vaddr = (char *)obj->phys_handle->vaddr + args->offset;
405 	char __user *user_data = to_user_ptr(args->data_ptr);
406 
407 	if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
408 		unsigned long unwritten;
409 
410 		/* The physical object once assigned is fixed for the lifetime
411 		 * of the obj, so we can safely drop the lock and continue
412 		 * to access vaddr.
413 		 */
414 		mutex_unlock(&dev->struct_mutex);
415 		unwritten = copy_from_user(vaddr, user_data, args->size);
416 		mutex_lock(&dev->struct_mutex);
417 		if (unwritten)
418 			return -EFAULT;
419 	}
420 
421 	i915_gem_chipset_flush(dev);
422 	return 0;
423 }
424 
i915_gem_object_alloc(struct drm_device * dev)425 void *i915_gem_object_alloc(struct drm_device *dev)
426 {
427 	struct drm_i915_private *dev_priv = dev->dev_private;
428 	return kmem_cache_zalloc(dev_priv->slab, GFP_KERNEL);
429 }
430 
i915_gem_object_free(struct drm_i915_gem_object * obj)431 void i915_gem_object_free(struct drm_i915_gem_object *obj)
432 {
433 	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
434 	kmem_cache_free(dev_priv->slab, obj);
435 }
436 
437 static int
i915_gem_create(struct drm_file * file,struct drm_device * dev,uint64_t size,uint32_t * handle_p)438 i915_gem_create(struct drm_file *file,
439 		struct drm_device *dev,
440 		uint64_t size,
441 		uint32_t *handle_p)
442 {
443 	struct drm_i915_gem_object *obj;
444 	int ret;
445 	u32 handle;
446 
447 	size = roundup(size, PAGE_SIZE);
448 	if (size == 0)
449 		return -EINVAL;
450 
451 	/* Allocate the new object */
452 	obj = i915_gem_alloc_object(dev, size);
453 	if (obj == NULL)
454 		return -ENOMEM;
455 
456 	ret = drm_gem_handle_create(file, &obj->base, &handle);
457 	/* drop reference from allocate - handle holds it now */
458 	drm_gem_object_unreference_unlocked(&obj->base);
459 	if (ret)
460 		return ret;
461 
462 	*handle_p = handle;
463 	return 0;
464 }
465 
466 int
i915_gem_dumb_create(struct drm_file * file,struct drm_device * dev,struct drm_mode_create_dumb * args)467 i915_gem_dumb_create(struct drm_file *file,
468 		     struct drm_device *dev,
469 		     struct drm_mode_create_dumb *args)
470 {
471 	/* have to work out size/pitch and return them */
472 #ifdef __NetBSD__		/* ALIGN means something else.  */
473 	args->pitch = round_up(args->width * DIV_ROUND_UP(args->bpp, 8), 64);
474 #else
475 	args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64);
476 #endif
477 	args->size = args->pitch * args->height;
478 	return i915_gem_create(file, dev,
479 			       args->size, &args->handle);
480 }
481 
482 /**
483  * Creates a new mm object and returns a handle to it.
484  */
485 int
i915_gem_create_ioctl(struct drm_device * dev,void * data,struct drm_file * file)486 i915_gem_create_ioctl(struct drm_device *dev, void *data,
487 		      struct drm_file *file)
488 {
489 	struct drm_i915_gem_create *args = data;
490 
491 	return i915_gem_create(file, dev,
492 			       args->size, &args->handle);
493 }
494 
495 static inline int
__copy_to_user_swizzled(char __user * cpu_vaddr,const char * gpu_vaddr,int gpu_offset,int length)496 __copy_to_user_swizzled(char __user *cpu_vaddr,
497 			const char *gpu_vaddr, int gpu_offset,
498 			int length)
499 {
500 	int ret, cpu_offset = 0;
501 
502 	while (length > 0) {
503 #ifdef __NetBSD__		/* XXX ALIGN means something else.  */
504 		int cacheline_end = round_up(gpu_offset + 1, 64);
505 #else
506 		int cacheline_end = ALIGN(gpu_offset + 1, 64);
507 #endif
508 		int this_length = min(cacheline_end - gpu_offset, length);
509 		int swizzled_gpu_offset = gpu_offset ^ 64;
510 
511 		ret = __copy_to_user(cpu_vaddr + cpu_offset,
512 				     gpu_vaddr + swizzled_gpu_offset,
513 				     this_length);
514 		if (ret)
515 			return ret + length;
516 
517 		cpu_offset += this_length;
518 		gpu_offset += this_length;
519 		length -= this_length;
520 	}
521 
522 	return 0;
523 }
524 
525 static inline int
__copy_from_user_swizzled(char * gpu_vaddr,int gpu_offset,const char __user * cpu_vaddr,int length)526 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
527 			  const char __user *cpu_vaddr,
528 			  int length)
529 {
530 	int ret, cpu_offset = 0;
531 
532 	while (length > 0) {
533 #ifdef __NetBSD__		/* XXX ALIGN means something else.  */
534 		int cacheline_end = round_up(gpu_offset + 1, 64);
535 #else
536 		int cacheline_end = ALIGN(gpu_offset + 1, 64);
537 #endif
538 		int this_length = min(cacheline_end - gpu_offset, length);
539 		int swizzled_gpu_offset = gpu_offset ^ 64;
540 
541 		ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
542 				       cpu_vaddr + cpu_offset,
543 				       this_length);
544 		if (ret)
545 			return ret + length;
546 
547 		cpu_offset += this_length;
548 		gpu_offset += this_length;
549 		length -= this_length;
550 	}
551 
552 	return 0;
553 }
554 
555 /*
556  * Pins the specified object's pages and synchronizes the object with
557  * GPU accesses. Sets needs_clflush to non-zero if the caller should
558  * flush the object from the CPU cache.
559  */
i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object * obj,int * needs_clflush)560 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
561 				    int *needs_clflush)
562 {
563 	int ret;
564 
565 	*needs_clflush = 0;
566 
567 #ifdef __NetBSD__
568 	if (obj->base.gemo_shm_uao == NULL)
569 		return -EINVAL;
570 #else
571 	if (!obj->base.filp)
572 		return -EINVAL;
573 #endif
574 
575 	if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
576 		/* If we're not in the cpu read domain, set ourself into the gtt
577 		 * read domain and manually flush cachelines (if required). This
578 		 * optimizes for the case when the gpu will dirty the data
579 		 * anyway again before the next pread happens. */
580 		*needs_clflush = !cpu_cache_is_coherent(obj->base.dev,
581 							obj->cache_level);
582 		ret = i915_gem_object_wait_rendering(obj, true);
583 		if (ret)
584 			return ret;
585 	}
586 
587 	ret = i915_gem_object_get_pages(obj);
588 	if (ret)
589 		return ret;
590 
591 	i915_gem_object_pin_pages(obj);
592 
593 	return ret;
594 }
595 
596 /* Per-page copy function for the shmem pread fastpath.
597  * Flushes invalid cachelines before reading the target if
598  * needs_clflush is set. */
599 static int
shmem_pread_fast(struct page * page,int shmem_page_offset,int page_length,char __user * user_data,bool page_do_bit17_swizzling,bool needs_clflush)600 shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length,
601 		 char __user *user_data,
602 		 bool page_do_bit17_swizzling, bool needs_clflush)
603 {
604 #ifdef __NetBSD__		/* XXX atomic shmem fast path */
605 	return -EFAULT;
606 #else
607 	char *vaddr;
608 	int ret;
609 
610 	if (unlikely(page_do_bit17_swizzling))
611 		return -EINVAL;
612 
613 	vaddr = kmap_atomic(page);
614 	if (needs_clflush)
615 		drm_clflush_virt_range(vaddr + shmem_page_offset,
616 				       page_length);
617 	ret = __copy_to_user_inatomic(user_data,
618 				      vaddr + shmem_page_offset,
619 				      page_length);
620 	kunmap_atomic(vaddr);
621 
622 	return ret ? -EFAULT : 0;
623 #endif
624 }
625 
626 static void
shmem_clflush_swizzled_range(char * addr,unsigned long length,bool swizzled)627 shmem_clflush_swizzled_range(char *addr, unsigned long length,
628 			     bool swizzled)
629 {
630 	if (unlikely(swizzled)) {
631 		unsigned long start = (unsigned long) addr;
632 		unsigned long end = (unsigned long) addr + length;
633 
634 		/* For swizzling simply ensure that we always flush both
635 		 * channels. Lame, but simple and it works. Swizzled
636 		 * pwrite/pread is far from a hotpath - current userspace
637 		 * doesn't use it at all. */
638 		start = round_down(start, 128);
639 		end = round_up(end, 128);
640 
641 		drm_clflush_virt_range((void *)start, end - start);
642 	} else {
643 		drm_clflush_virt_range(addr, length);
644 	}
645 
646 }
647 
648 /* Only difference to the fast-path function is that this can handle bit17
649  * and uses non-atomic copy and kmap functions. */
650 static int
shmem_pread_slow(struct page * page,int shmem_page_offset,int page_length,char __user * user_data,bool page_do_bit17_swizzling,bool needs_clflush)651 shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
652 		 char __user *user_data,
653 		 bool page_do_bit17_swizzling, bool needs_clflush)
654 {
655 	char *vaddr;
656 	int ret;
657 
658 	vaddr = kmap(page);
659 	if (needs_clflush)
660 		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
661 					     page_length,
662 					     page_do_bit17_swizzling);
663 
664 	if (page_do_bit17_swizzling)
665 		ret = __copy_to_user_swizzled(user_data,
666 					      vaddr, shmem_page_offset,
667 					      page_length);
668 	else
669 		ret = __copy_to_user(user_data,
670 				     vaddr + shmem_page_offset,
671 				     page_length);
672 	kunmap(page);
673 
674 	return ret ? - EFAULT : 0;
675 }
676 
677 static int
i915_gem_shmem_pread(struct drm_device * dev,struct drm_i915_gem_object * obj,struct drm_i915_gem_pread * args,struct drm_file * file)678 i915_gem_shmem_pread(struct drm_device *dev,
679 		     struct drm_i915_gem_object *obj,
680 		     struct drm_i915_gem_pread *args,
681 		     struct drm_file *file)
682 {
683 	char __user *user_data;
684 	ssize_t remain;
685 	loff_t offset;
686 	int shmem_page_offset, page_length, ret = 0;
687 	int obj_do_bit17_swizzling, page_do_bit17_swizzling;
688 #ifndef __NetBSD__		/* XXX */
689 	int prefaulted = 0;
690 #endif
691 	int needs_clflush = 0;
692 #ifndef __NetBSD__
693 	struct sg_page_iter sg_iter;
694 #endif
695 
696 	user_data = to_user_ptr(args->data_ptr);
697 	remain = args->size;
698 
699 	obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
700 
701 	ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
702 	if (ret)
703 		return ret;
704 
705 	offset = args->offset;
706 
707 #ifdef __NetBSD__
708 	while (0 < remain)
709 #else
710 	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
711 			 offset >> PAGE_SHIFT)
712 #endif
713 	{
714 #ifdef __NetBSD__
715 		struct page *const page = i915_gem_object_get_page(obj,
716 		    atop(offset));
717 #else
718 		struct page *page = sg_page_iter_page(&sg_iter);
719 
720 		if (remain <= 0)
721 			break;
722 #endif
723 
724 		/* Operation in this page
725 		 *
726 		 * shmem_page_offset = offset within page in shmem file
727 		 * page_length = bytes to copy for this page
728 		 */
729 		shmem_page_offset = offset_in_page(offset);
730 		page_length = remain;
731 		if ((shmem_page_offset + page_length) > PAGE_SIZE)
732 			page_length = PAGE_SIZE - shmem_page_offset;
733 
734 		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
735 			(page_to_phys(page) & (1 << 17)) != 0;
736 
737 		ret = shmem_pread_fast(page, shmem_page_offset, page_length,
738 				       user_data, page_do_bit17_swizzling,
739 				       needs_clflush);
740 		if (ret == 0)
741 			goto next_page;
742 
743 		mutex_unlock(&dev->struct_mutex);
744 #ifndef __NetBSD__
745 		if (likely(!i915.prefault_disable) && !prefaulted) {
746 			ret = fault_in_multipages_writeable(user_data, remain);
747 			/* Userspace is tricking us, but we've already clobbered
748 			 * its pages with the prefault and promised to write the
749 			 * data up to the first fault. Hence ignore any errors
750 			 * and just continue. */
751 			(void)ret;
752 			prefaulted = 1;
753 		}
754 #endif
755 		ret = shmem_pread_slow(page, shmem_page_offset, page_length,
756 				       user_data, page_do_bit17_swizzling,
757 				       needs_clflush);
758 
759 		mutex_lock(&dev->struct_mutex);
760 
761 		if (ret)
762 			goto out;
763 
764 next_page:
765 		remain -= page_length;
766 		user_data += page_length;
767 		offset += page_length;
768 	}
769 
770 out:
771 	i915_gem_object_unpin_pages(obj);
772 
773 	return ret;
774 }
775 
776 /**
777  * Reads data from the object referenced by handle.
778  *
779  * On error, the contents of *data are undefined.
780  */
781 int
i915_gem_pread_ioctl(struct drm_device * dev,void * data,struct drm_file * file)782 i915_gem_pread_ioctl(struct drm_device *dev, void *data,
783 		     struct drm_file *file)
784 {
785 	struct drm_i915_gem_pread *args = data;
786 	struct drm_gem_object *gobj;
787 	struct drm_i915_gem_object *obj;
788 	int ret = 0;
789 
790 	if (args->size == 0)
791 		return 0;
792 
793 	if (!access_ok(VERIFY_WRITE,
794 		       to_user_ptr(args->data_ptr),
795 		       args->size))
796 		return -EFAULT;
797 
798 	ret = i915_mutex_lock_interruptible(dev);
799 	if (ret)
800 		return ret;
801 
802 	gobj = drm_gem_object_lookup(dev, file, args->handle);
803 	if (gobj == NULL) {
804 		ret = -ENOENT;
805 		goto unlock;
806 	}
807 	obj = to_intel_bo(gobj);
808 
809 	/* Bounds check source.  */
810 	if (args->offset > obj->base.size ||
811 	    args->size > obj->base.size - args->offset) {
812 		ret = -EINVAL;
813 		goto out;
814 	}
815 
816 	/* prime objects have no backing filp to GEM pread/pwrite
817 	 * pages from.
818 	 */
819 #ifdef __NetBSD__
820 	/* Also stolen objects.  */
821 	if (obj->base.gemo_shm_uao == NULL) {
822 		ret = -EINVAL;
823 		goto out;
824 	}
825 #else
826 	if (!obj->base.filp) {
827 		ret = -EINVAL;
828 		goto out;
829 	}
830 #endif
831 
832 	trace_i915_gem_object_pread(obj, args->offset, args->size);
833 
834 	ret = i915_gem_shmem_pread(dev, obj, args, file);
835 
836 out:
837 	drm_gem_object_unreference(&obj->base);
838 unlock:
839 	mutex_unlock(&dev->struct_mutex);
840 	return ret;
841 }
842 
843 /* This is the fast write path which cannot handle
844  * page faults in the source data
845  */
846 
847 static inline int
fast_user_write(struct io_mapping * mapping,loff_t page_base,int page_offset,char __user * user_data,int length)848 fast_user_write(struct io_mapping *mapping,
849 		loff_t page_base, int page_offset,
850 		char __user *user_data,
851 		int length)
852 {
853 #ifdef __NetBSD__		/* XXX atomic shmem fast path */
854 	return -EFAULT;
855 #else
856 	void __iomem *vaddr_atomic;
857 	void *vaddr;
858 	unsigned long unwritten;
859 
860 	vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
861 	/* We can use the cpu mem copy function because this is X86. */
862 	vaddr = (void __force*)vaddr_atomic + page_offset;
863 	unwritten = __copy_from_user_inatomic_nocache(vaddr,
864 						      user_data, length);
865 	io_mapping_unmap_atomic(vaddr_atomic);
866 	return unwritten;
867 #endif
868 }
869 
870 /**
871  * This is the fast pwrite path, where we copy the data directly from the
872  * user into the GTT, uncached.
873  */
874 static int
i915_gem_gtt_pwrite_fast(struct drm_device * dev,struct drm_i915_gem_object * obj,struct drm_i915_gem_pwrite * args,struct drm_file * file)875 i915_gem_gtt_pwrite_fast(struct drm_device *dev,
876 			 struct drm_i915_gem_object *obj,
877 			 struct drm_i915_gem_pwrite *args,
878 			 struct drm_file *file)
879 {
880 	struct drm_i915_private *dev_priv = dev->dev_private;
881 	ssize_t remain;
882 	loff_t offset, page_base;
883 	char __user *user_data;
884 	int page_offset, page_length, ret;
885 
886 	ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
887 	if (ret)
888 		goto out;
889 
890 	ret = i915_gem_object_set_to_gtt_domain(obj, true);
891 	if (ret)
892 		goto out_unpin;
893 
894 	ret = i915_gem_object_put_fence(obj);
895 	if (ret)
896 		goto out_unpin;
897 
898 	user_data = to_user_ptr(args->data_ptr);
899 	remain = args->size;
900 
901 	offset = i915_gem_obj_ggtt_offset(obj) + args->offset;
902 
903 	while (remain > 0) {
904 		/* Operation in this page
905 		 *
906 		 * page_base = page offset within aperture
907 		 * page_offset = offset within page
908 		 * page_length = bytes to copy for this page
909 		 */
910 		page_base = offset & PAGE_MASK;
911 		page_offset = offset_in_page(offset);
912 		page_length = remain;
913 		if ((page_offset + remain) > PAGE_SIZE)
914 			page_length = PAGE_SIZE - page_offset;
915 
916 		/* If we get a fault while copying data, then (presumably) our
917 		 * source page isn't available.  Return the error and we'll
918 		 * retry in the slow path.
919 		 */
920 		if (fast_user_write(dev_priv->gtt.mappable, page_base,
921 				    page_offset, user_data, page_length)) {
922 			ret = -EFAULT;
923 			goto out_unpin;
924 		}
925 
926 		remain -= page_length;
927 		user_data += page_length;
928 		offset += page_length;
929 	}
930 
931 out_unpin:
932 	i915_gem_object_ggtt_unpin(obj);
933 out:
934 	return ret;
935 }
936 
937 /* Per-page copy function for the shmem pwrite fastpath.
938  * Flushes invalid cachelines before writing to the target if
939  * needs_clflush_before is set and flushes out any written cachelines after
940  * writing if needs_clflush is set. */
941 static int
shmem_pwrite_fast(struct page * page,int shmem_page_offset,int page_length,char __user * user_data,bool page_do_bit17_swizzling,bool needs_clflush_before,bool needs_clflush_after)942 shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
943 		  char __user *user_data,
944 		  bool page_do_bit17_swizzling,
945 		  bool needs_clflush_before,
946 		  bool needs_clflush_after)
947 {
948 #ifdef __NetBSD__
949 	return -EFAULT;
950 #else
951 	char *vaddr;
952 	int ret;
953 
954 	if (unlikely(page_do_bit17_swizzling))
955 		return -EINVAL;
956 
957 	vaddr = kmap_atomic(page);
958 	if (needs_clflush_before)
959 		drm_clflush_virt_range(vaddr + shmem_page_offset,
960 				       page_length);
961 	ret = __copy_from_user_inatomic(vaddr + shmem_page_offset,
962 					user_data, page_length);
963 	if (needs_clflush_after)
964 		drm_clflush_virt_range(vaddr + shmem_page_offset,
965 				       page_length);
966 	kunmap_atomic(vaddr);
967 
968 	return ret ? -EFAULT : 0;
969 #endif
970 }
971 
972 /* Only difference to the fast-path function is that this can handle bit17
973  * and uses non-atomic copy and kmap functions. */
974 static int
shmem_pwrite_slow(struct page * page,int shmem_page_offset,int page_length,char __user * user_data,bool page_do_bit17_swizzling,bool needs_clflush_before,bool needs_clflush_after)975 shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
976 		  char __user *user_data,
977 		  bool page_do_bit17_swizzling,
978 		  bool needs_clflush_before,
979 		  bool needs_clflush_after)
980 {
981 	char *vaddr;
982 	int ret;
983 
984 	vaddr = kmap(page);
985 	if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
986 		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
987 					     page_length,
988 					     page_do_bit17_swizzling);
989 	if (page_do_bit17_swizzling)
990 		ret = __copy_from_user_swizzled(vaddr, shmem_page_offset,
991 						user_data,
992 						page_length);
993 	else
994 		ret = __copy_from_user(vaddr + shmem_page_offset,
995 				       user_data,
996 				       page_length);
997 	if (needs_clflush_after)
998 		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
999 					     page_length,
1000 					     page_do_bit17_swizzling);
1001 	kunmap(page);
1002 
1003 	return ret ? -EFAULT : 0;
1004 }
1005 
1006 static int
i915_gem_shmem_pwrite(struct drm_device * dev,struct drm_i915_gem_object * obj,struct drm_i915_gem_pwrite * args,struct drm_file * file)1007 i915_gem_shmem_pwrite(struct drm_device *dev,
1008 		      struct drm_i915_gem_object *obj,
1009 		      struct drm_i915_gem_pwrite *args,
1010 		      struct drm_file *file)
1011 {
1012 	ssize_t remain;
1013 	loff_t offset;
1014 	char __user *user_data;
1015 	int shmem_page_offset, page_length, ret = 0;
1016 	int obj_do_bit17_swizzling, page_do_bit17_swizzling;
1017 	int hit_slowpath = 0;
1018 	int needs_clflush_after = 0;
1019 	int needs_clflush_before = 0;
1020 #ifndef __NetBSD__
1021 	struct sg_page_iter sg_iter;
1022 	int flush_mask = boot_cpu_data.x86_clflush_size - 1;
1023 #else
1024 	int flush_mask = cpu_info_primary.ci_cflush_lsize - 1;
1025 #endif
1026 
1027 	user_data = to_user_ptr(args->data_ptr);
1028 	remain = args->size;
1029 
1030 	obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
1031 
1032 	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
1033 		/* If we're not in the cpu write domain, set ourself into the gtt
1034 		 * write domain and manually flush cachelines (if required). This
1035 		 * optimizes for the case when the gpu will use the data
1036 		 * right away and we therefore have to clflush anyway. */
1037 		needs_clflush_after = cpu_write_needs_clflush(obj);
1038 		ret = i915_gem_object_wait_rendering(obj, false);
1039 		if (ret)
1040 			return ret;
1041 	}
1042 	/* Same trick applies to invalidate partially written cachelines read
1043 	 * before writing. */
1044 	if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
1045 		needs_clflush_before =
1046 			!cpu_cache_is_coherent(dev, obj->cache_level);
1047 
1048 	ret = i915_gem_object_get_pages(obj);
1049 	if (ret)
1050 		return ret;
1051 
1052 	i915_gem_object_pin_pages(obj);
1053 
1054 	offset = args->offset;
1055 	obj->dirty = 1;
1056 
1057 #ifdef __NetBSD__
1058 	while (0 < remain)
1059 #else
1060 	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
1061 			 offset >> PAGE_SHIFT)
1062 #endif
1063 	{
1064 #ifdef __NetBSD__
1065 		struct page *const page = i915_gem_object_get_page(obj,
1066 		    atop(offset));
1067 #else
1068 		struct page *page = sg_page_iter_page(&sg_iter);
1069 #endif
1070 
1071 		if (remain <= 0)
1072 			break;
1073 
1074 		/* Operation in this page
1075 		 *
1076 		 * shmem_page_offset = offset within page in shmem file
1077 		 * page_length = bytes to copy for this page
1078 		 */
1079 		shmem_page_offset = offset_in_page(offset);
1080 
1081 		page_length = remain;
1082 		if ((shmem_page_offset + page_length) > PAGE_SIZE)
1083 			page_length = PAGE_SIZE - shmem_page_offset;
1084 
1085 		/* If we don't overwrite a cacheline completely we need to be
1086 		 * careful to have up-to-date data by first clflushing. Don't
1087 		 * overcomplicate things and flush the entire patch. */
1088 		const int partial_cacheline_write = needs_clflush_before &&
1089 			((shmem_page_offset | page_length) & flush_mask);
1090 
1091 		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
1092 			(page_to_phys(page) & (1 << 17)) != 0;
1093 
1094 		ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
1095 					user_data, page_do_bit17_swizzling,
1096 					partial_cacheline_write,
1097 					needs_clflush_after);
1098 		if (ret == 0)
1099 			goto next_page;
1100 
1101 		hit_slowpath = 1;
1102 		mutex_unlock(&dev->struct_mutex);
1103 		ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
1104 					user_data, page_do_bit17_swizzling,
1105 					partial_cacheline_write,
1106 					needs_clflush_after);
1107 
1108 		mutex_lock(&dev->struct_mutex);
1109 
1110 		if (ret)
1111 			goto out;
1112 
1113 next_page:
1114 		remain -= page_length;
1115 		user_data += page_length;
1116 		offset += page_length;
1117 	}
1118 
1119 out:
1120 	i915_gem_object_unpin_pages(obj);
1121 
1122 	if (hit_slowpath) {
1123 		/*
1124 		 * Fixup: Flush cpu caches in case we didn't flush the dirty
1125 		 * cachelines in-line while writing and the object moved
1126 		 * out of the cpu write domain while we've dropped the lock.
1127 		 */
1128 		if (!needs_clflush_after &&
1129 		    obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
1130 			if (i915_gem_clflush_object(obj, obj->pin_display))
1131 				i915_gem_chipset_flush(dev);
1132 		}
1133 	}
1134 
1135 	if (needs_clflush_after)
1136 		i915_gem_chipset_flush(dev);
1137 
1138 	return ret;
1139 }
1140 
1141 /**
1142  * Writes data to the object referenced by handle.
1143  *
1144  * On error, the contents of the buffer that were to be modified are undefined.
1145  */
1146 int
i915_gem_pwrite_ioctl(struct drm_device * dev,void * data,struct drm_file * file)1147 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
1148 		      struct drm_file *file)
1149 {
1150 	struct drm_i915_gem_pwrite *args = data;
1151 	struct drm_gem_object *gobj;
1152 	struct drm_i915_gem_object *obj;
1153 	int ret;
1154 
1155 	if (args->size == 0)
1156 		return 0;
1157 
1158 	if (!access_ok(VERIFY_READ,
1159 		       to_user_ptr(args->data_ptr),
1160 		       args->size))
1161 		return -EFAULT;
1162 
1163 #ifndef __NetBSD__		/* XXX prefault */
1164 	if (likely(!i915.prefault_disable)) {
1165 		ret = fault_in_multipages_readable(to_user_ptr(args->data_ptr),
1166 						   args->size);
1167 		if (ret)
1168 			return -EFAULT;
1169 	}
1170 #endif
1171 
1172 	ret = i915_mutex_lock_interruptible(dev);
1173 	if (ret)
1174 		return ret;
1175 
1176 	gobj = drm_gem_object_lookup(dev, file, args->handle);
1177 	if (gobj == NULL) {
1178 		ret = -ENOENT;
1179 		goto unlock;
1180 	}
1181 	obj = to_intel_bo(gobj);
1182 
1183 	/* Bounds check destination. */
1184 	if (args->offset > obj->base.size ||
1185 	    args->size > obj->base.size - args->offset) {
1186 		ret = -EINVAL;
1187 		goto out;
1188 	}
1189 
1190 	/* prime objects have no backing filp to GEM pread/pwrite
1191 	 * pages from.
1192 	 */
1193 #ifdef __NetBSD__
1194 	/* Also stolen objects.  */
1195 	if (obj->base.gemo_shm_uao == NULL) {
1196 		ret = -EINVAL;
1197 		goto out;
1198 	}
1199 #else
1200 	if (!obj->base.filp) {
1201 		ret = -EINVAL;
1202 		goto out;
1203 	}
1204 #endif
1205 
1206 	trace_i915_gem_object_pwrite(obj, args->offset, args->size);
1207 
1208 	ret = -EFAULT;
1209 	/* We can only do the GTT pwrite on untiled buffers, as otherwise
1210 	 * it would end up going through the fenced access, and we'll get
1211 	 * different detiling behavior between reading and writing.
1212 	 * pread/pwrite currently are reading and writing from the CPU
1213 	 * perspective, requiring manual detiling by the client.
1214 	 */
1215 	if (obj->phys_handle) {
1216 		ret = i915_gem_phys_pwrite(obj, args, file);
1217 		goto out;
1218 	}
1219 
1220 	if (obj->tiling_mode == I915_TILING_NONE &&
1221 	    obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
1222 	    cpu_write_needs_clflush(obj)) {
1223 		ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
1224 		/* Note that the gtt paths might fail with non-page-backed user
1225 		 * pointers (e.g. gtt mappings when moving data between
1226 		 * textures). Fallback to the shmem path in that case. */
1227 	}
1228 
1229 	if (ret == -EFAULT || ret == -ENOSPC)
1230 		ret = i915_gem_shmem_pwrite(dev, obj, args, file);
1231 
1232 out:
1233 	drm_gem_object_unreference(&obj->base);
1234 unlock:
1235 	mutex_unlock(&dev->struct_mutex);
1236 	return ret;
1237 }
1238 
1239 int
i915_gem_check_wedge(struct i915_gpu_error * error,bool interruptible)1240 i915_gem_check_wedge(struct i915_gpu_error *error,
1241 		     bool interruptible)
1242 {
1243 	if (i915_reset_in_progress(error)) {
1244 		/* Non-interruptible callers can't handle -EAGAIN, hence return
1245 		 * -EIO unconditionally for these. */
1246 		if (!interruptible)
1247 			return -EIO;
1248 
1249 		/* Recovery complete, but the reset failed ... */
1250 		if (i915_terminally_wedged(error))
1251 			return -EIO;
1252 
1253 		return -EAGAIN;
1254 	}
1255 
1256 	return 0;
1257 }
1258 
1259 /*
1260  * Compare seqno against outstanding lazy request. Emit a request if they are
1261  * equal.
1262  */
1263 static int
i915_gem_check_olr(struct intel_ring_buffer * ring,u32 seqno)1264 i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno)
1265 {
1266 	int ret;
1267 
1268 	BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex));
1269 
1270 	ret = 0;
1271 	if (seqno == ring->outstanding_lazy_seqno)
1272 		ret = i915_add_request(ring, NULL);
1273 
1274 	return ret;
1275 }
1276 
1277 #ifndef __NetBSD__
fake_irq(unsigned long data)1278 static void fake_irq(unsigned long data)
1279 {
1280 	wake_up_process((struct task_struct *)data);
1281 }
1282 #endif
1283 
missed_irq(struct drm_i915_private * dev_priv,struct intel_ring_buffer * ring)1284 static bool missed_irq(struct drm_i915_private *dev_priv,
1285 		       struct intel_ring_buffer *ring)
1286 {
1287 	return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings);
1288 }
1289 
can_wait_boost(struct drm_i915_file_private * file_priv)1290 static bool can_wait_boost(struct drm_i915_file_private *file_priv)
1291 {
1292 	if (file_priv == NULL)
1293 		return true;
1294 
1295 	return !atomic_xchg(&file_priv->rps_wait_boost, true);
1296 }
1297 
1298 /**
1299  * __wait_seqno - wait until execution of seqno has finished
1300  * @ring: the ring expected to report seqno
1301  * @seqno: duh!
1302  * @reset_counter: reset sequence associated with the given seqno
1303  * @interruptible: do an interruptible wait (normally yes)
1304  * @timeout: in - how long to wait (NULL forever); out - how much time remaining
1305  *
1306  * Note: It is of utmost importance that the passed in seqno and reset_counter
1307  * values have been read by the caller in an smp safe manner. Where read-side
1308  * locks are involved, it is sufficient to read the reset_counter before
1309  * unlocking the lock that protects the seqno. For lockless tricks, the
1310  * reset_counter _must_ be read before, and an appropriate smp_rmb must be
1311  * inserted.
1312  *
1313  * Returns 0 if the seqno was found within the alloted time. Else returns the
1314  * errno with remaining time filled in timeout argument.
1315  */
1316 #ifdef __NetBSD__
1317 static int
__wait_seqno(struct intel_ring_buffer * ring,u32 seqno,unsigned reset_counter,bool interruptible,struct timespec * timeout,struct drm_i915_file_private * file_priv)1318 __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, unsigned reset_counter,
1319     bool interruptible, struct timespec *timeout,
1320     struct drm_i915_file_private *file_priv)
1321 {
1322 	struct drm_device *dev = ring->dev;
1323 	struct drm_i915_private *dev_priv = dev->dev_private;
1324 	bool irq_test_in_progress;
1325 	struct timespec before, after;
1326 	int ticks;
1327 	bool wedged;
1328 	int ret;
1329 
1330 	irq_test_in_progress = (dev_priv->gpu_error.test_irq_rings &
1331 	    intel_ring_flag(ring));
1332 	__insn_barrier();
1333 
1334 	if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
1335 		return 0;
1336 
1337 	if (timeout)
1338 		ticks = mstohz(timespec_to_ns(timeout) / 1000000);
1339 	else
1340 		ticks = 1;
1341 
1342 	if (INTEL_INFO(dev)->gen >= 6 && can_wait_boost(file_priv)) {
1343 		gen6_rps_boost(dev_priv);
1344 		if (file_priv)
1345 			mod_delayed_work(dev_priv->wq,
1346 					 &file_priv->mm.idle_work,
1347 					 msecs_to_jiffies(100));
1348 	}
1349 
1350 	if (!irq_test_in_progress && WARN_ON(!ring->irq_get(ring)))
1351 		return -ENODEV;
1352 
1353 	nanotime(&before);
1354 	spin_lock(&dev_priv->irq_lock);
1355 #define	EXIT_COND							      \
1356 	((wedged = (reset_counter !=					      \
1357 		atomic_read(&dev_priv->gpu_error.reset_counter))) ||	      \
1358 	    i915_seqno_passed(ring->get_seqno(ring, false),		      \
1359 		seqno))
1360 
1361 	if (timeout) {
1362 		/*
1363 		 * XXX This missed_irq business smells like unlocked
1364 		 * Linux waitqueue nonsense.
1365 		 */
1366 		if (missed_irq(dev_priv, ring))
1367 			ticks = 1;
1368 		if (interruptible)
1369 			DRM_SPIN_TIMED_WAIT_UNTIL(ret, &ring->irq_queue,
1370 			    &dev_priv->irq_lock, ticks, EXIT_COND);
1371 		else
1372 			DRM_SPIN_TIMED_WAIT_NOINTR_UNTIL(ret, &ring->irq_queue,
1373 			    &dev_priv->irq_lock, ticks, EXIT_COND);
1374 		if (ret < 0)	/* Failure: return negative error as is.  */
1375 			;
1376 		else if (ret == 0) /* Timed out: return -ETIME.  */
1377 			ret = -ETIME;
1378 		else		/* Succeeded (ret > 0): return 0.  */
1379 			ret = 0;
1380 	} else {
1381 		if (interruptible)
1382 			DRM_SPIN_WAIT_UNTIL(ret, &ring->irq_queue,
1383 			    &dev_priv->irq_lock, EXIT_COND);
1384 		else
1385 			DRM_SPIN_WAIT_NOINTR_UNTIL(ret, &ring->irq_queue,
1386 			    &dev_priv->irq_lock, EXIT_COND);
1387 		/* ret is negative on failure or zero on success.  */
1388 	}
1389 #undef	EXIT_COND
1390 	spin_unlock(&dev_priv->irq_lock);
1391 	nanotime(&after);
1392 
1393 	if (!irq_test_in_progress)
1394 		ring->irq_put(ring);
1395 	if (timeout) {
1396 		struct timespec slept;
1397 
1398 		/* Compute slept = after - before.  */
1399 		timespecsub(&after, &before, &slept);
1400 
1401 		/*
1402 		 * Return the time remaining, timeout - slept, if we
1403 		 * slept for less time than the timeout; or zero if we
1404 		 * timed out.
1405 		 */
1406 		if (timespeccmp(&slept, timeout, <))
1407 			timespecsub(timeout, &slept, timeout);
1408 		else
1409 			timespecclear(timeout);
1410 	}
1411 	if (wedged) {		/* GPU reset while we were waiting.  */
1412 		ret = i915_gem_check_wedge(&dev_priv->gpu_error,
1413 		    interruptible);
1414 		if (ret == 0)
1415 			ret = -EAGAIN;
1416 	}
1417 	return ret;
1418 }
1419 #else
__wait_seqno(struct intel_ring_buffer * ring,u32 seqno,unsigned reset_counter,bool interruptible,struct timespec * timeout,struct drm_i915_file_private * file_priv)1420 static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
1421 			unsigned reset_counter,
1422 			bool interruptible,
1423 			struct timespec *timeout,
1424 			struct drm_i915_file_private *file_priv)
1425 {
1426 	struct drm_device *dev = ring->dev;
1427 	struct drm_i915_private *dev_priv = dev->dev_private;
1428 	const bool irq_test_in_progress =
1429 		ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring);
1430 	struct timespec before, now;
1431 	DEFINE_WAIT(wait);
1432 	unsigned long timeout_expire;
1433 	int ret;
1434 
1435 	WARN(dev_priv->pm.irqs_disabled, "IRQs disabled\n");
1436 
1437 	if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
1438 		return 0;
1439 
1440 	timeout_expire = timeout ? jiffies + timespec_to_jiffies_timeout(timeout) : 0;
1441 
1442 	if (INTEL_INFO(dev)->gen >= 6 && can_wait_boost(file_priv)) {
1443 		gen6_rps_boost(dev_priv);
1444 		if (file_priv)
1445 			mod_delayed_work(dev_priv->wq,
1446 					 &file_priv->mm.idle_work,
1447 					 msecs_to_jiffies(100));
1448 	}
1449 
1450 	if (!irq_test_in_progress && WARN_ON(!ring->irq_get(ring)))
1451 		return -ENODEV;
1452 
1453 	/* Record current time in case interrupted by signal, or wedged */
1454 	trace_i915_gem_request_wait_begin(ring, seqno);
1455 	getrawmonotonic(&before);
1456 	for (;;) {
1457 		struct timer_list timer;
1458 
1459 		prepare_to_wait(&ring->irq_queue, &wait,
1460 				interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
1461 
1462 		/* We need to check whether any gpu reset happened in between
1463 		 * the caller grabbing the seqno and now ... */
1464 		if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) {
1465 			/* ... but upgrade the -EAGAIN to an -EIO if the gpu
1466 			 * is truely gone. */
1467 			ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
1468 			if (ret == 0)
1469 				ret = -EAGAIN;
1470 			break;
1471 		}
1472 
1473 		if (i915_seqno_passed(ring->get_seqno(ring, false), seqno)) {
1474 			ret = 0;
1475 			break;
1476 		}
1477 
1478 		if (interruptible && signal_pending(current)) {
1479 			ret = -ERESTARTSYS;
1480 			break;
1481 		}
1482 
1483 		if (timeout && time_after_eq(jiffies, timeout_expire)) {
1484 			ret = -ETIME;
1485 			break;
1486 		}
1487 
1488 		timer.function = NULL;
1489 		if (timeout || missed_irq(dev_priv, ring)) {
1490 			unsigned long expire;
1491 
1492 			setup_timer_on_stack(&timer, fake_irq, (unsigned long)current);
1493 			expire = missed_irq(dev_priv, ring) ? jiffies + 1 : timeout_expire;
1494 			mod_timer(&timer, expire);
1495 		}
1496 
1497 		io_schedule();
1498 
1499 		if (timer.function) {
1500 			del_singleshot_timer_sync(&timer);
1501 			destroy_timer_on_stack(&timer);
1502 		}
1503 	}
1504 	getrawmonotonic(&now);
1505 	trace_i915_gem_request_wait_end(ring, seqno);
1506 
1507 	if (!irq_test_in_progress)
1508 		ring->irq_put(ring);
1509 
1510 	finish_wait(&ring->irq_queue, &wait);
1511 
1512 	if (timeout) {
1513 		struct timespec sleep_time = timespec_sub(now, before);
1514 		*timeout = timespec_sub(*timeout, sleep_time);
1515 		if (!timespec_valid(timeout)) /* i.e. negative time remains */
1516 			set_normalized_timespec(timeout, 0, 0);
1517 	}
1518 
1519 	return ret;
1520 }
1521 #endif
1522 
1523 /**
1524  * Waits for a sequence number to be signaled, and cleans up the
1525  * request and object lists appropriately for that event.
1526  */
1527 int
i915_wait_seqno(struct intel_ring_buffer * ring,uint32_t seqno)1528 i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno)
1529 {
1530 	struct drm_device *dev = ring->dev;
1531 	struct drm_i915_private *dev_priv = dev->dev_private;
1532 	bool interruptible = dev_priv->mm.interruptible;
1533 	int ret;
1534 
1535 	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1536 	BUG_ON(seqno == 0);
1537 
1538 	ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
1539 	if (ret)
1540 		return ret;
1541 
1542 	ret = i915_gem_check_olr(ring, seqno);
1543 	if (ret)
1544 		return ret;
1545 
1546 	return __wait_seqno(ring, seqno,
1547 			    atomic_read(&dev_priv->gpu_error.reset_counter),
1548 			    interruptible, NULL, NULL);
1549 }
1550 
1551 static int
i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object * obj,struct intel_ring_buffer * ring)1552 i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj,
1553 				     struct intel_ring_buffer *ring)
1554 {
1555 	i915_gem_retire_requests_ring(ring);
1556 
1557 	/* Manually manage the write flush as we may have not yet
1558 	 * retired the buffer.
1559 	 *
1560 	 * Note that the last_write_seqno is always the earlier of
1561 	 * the two (read/write) seqno, so if we haved successfully waited,
1562 	 * we know we have passed the last write.
1563 	 */
1564 	obj->last_write_seqno = 0;
1565 	obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
1566 
1567 	return 0;
1568 }
1569 
1570 /**
1571  * Ensures that all rendering to the object has completed and the object is
1572  * safe to unbind from the GTT or access from the CPU.
1573  */
1574 static __must_check int
i915_gem_object_wait_rendering(struct drm_i915_gem_object * obj,bool readonly)1575 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
1576 			       bool readonly)
1577 {
1578 	struct intel_ring_buffer *ring = obj->ring;
1579 	u32 seqno;
1580 	int ret;
1581 
1582 	seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
1583 	if (seqno == 0)
1584 		return 0;
1585 
1586 	ret = i915_wait_seqno(ring, seqno);
1587 	if (ret)
1588 		return ret;
1589 
1590 	return i915_gem_object_wait_rendering__tail(obj, ring);
1591 }
1592 
1593 /* A nonblocking variant of the above wait. This is a highly dangerous routine
1594  * as the object state may change during this call.
1595  */
1596 static __must_check int
i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object * obj,struct drm_i915_file_private * file_priv,bool readonly)1597 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
1598 					    struct drm_i915_file_private *file_priv,
1599 					    bool readonly)
1600 {
1601 	struct drm_device *dev = obj->base.dev;
1602 	struct drm_i915_private *dev_priv = dev->dev_private;
1603 	struct intel_ring_buffer *ring = obj->ring;
1604 	unsigned reset_counter;
1605 	u32 seqno;
1606 	int ret;
1607 
1608 	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1609 	BUG_ON(!dev_priv->mm.interruptible);
1610 
1611 	seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
1612 	if (seqno == 0)
1613 		return 0;
1614 
1615 	ret = i915_gem_check_wedge(&dev_priv->gpu_error, true);
1616 	if (ret)
1617 		return ret;
1618 
1619 	ret = i915_gem_check_olr(ring, seqno);
1620 	if (ret)
1621 		return ret;
1622 
1623 	reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
1624 	mutex_unlock(&dev->struct_mutex);
1625 	ret = __wait_seqno(ring, seqno, reset_counter, true, NULL, file_priv);
1626 	mutex_lock(&dev->struct_mutex);
1627 	if (ret)
1628 		return ret;
1629 
1630 	return i915_gem_object_wait_rendering__tail(obj, ring);
1631 }
1632 
1633 /**
1634  * Called when user space prepares to use an object with the CPU, either
1635  * through the mmap ioctl's mapping or a GTT mapping.
1636  */
1637 int
i915_gem_set_domain_ioctl(struct drm_device * dev,void * data,struct drm_file * file)1638 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1639 			  struct drm_file *file)
1640 {
1641 	struct drm_i915_gem_set_domain *args = data;
1642 	struct drm_gem_object *gobj;
1643 	struct drm_i915_gem_object *obj;
1644 	uint32_t read_domains = args->read_domains;
1645 	uint32_t write_domain = args->write_domain;
1646 	int ret;
1647 
1648 	/* Only handle setting domains to types used by the CPU. */
1649 	if (write_domain & I915_GEM_GPU_DOMAINS)
1650 		return -EINVAL;
1651 
1652 	if (read_domains & I915_GEM_GPU_DOMAINS)
1653 		return -EINVAL;
1654 
1655 	/* Having something in the write domain implies it's in the read
1656 	 * domain, and only that read domain.  Enforce that in the request.
1657 	 */
1658 	if (write_domain != 0 && read_domains != write_domain)
1659 		return -EINVAL;
1660 
1661 	ret = i915_mutex_lock_interruptible(dev);
1662 	if (ret)
1663 		return ret;
1664 
1665 	gobj = drm_gem_object_lookup(dev, file, args->handle);
1666 	if (gobj == NULL) {
1667 		ret = -ENOENT;
1668 		goto unlock;
1669 	}
1670 	obj = to_intel_bo(gobj);
1671 
1672 	/* Try to flush the object off the GPU without holding the lock.
1673 	 * We will repeat the flush holding the lock in the normal manner
1674 	 * to catch cases where we are gazumped.
1675 	 */
1676 	ret = i915_gem_object_wait_rendering__nonblocking(obj,
1677 							  file->driver_priv,
1678 							  !write_domain);
1679 	if (ret)
1680 		goto unref;
1681 
1682 	if (read_domains & I915_GEM_DOMAIN_GTT) {
1683 		ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
1684 
1685 		/* Silently promote "you're not bound, there was nothing to do"
1686 		 * to success, since the client was just asking us to
1687 		 * make sure everything was done.
1688 		 */
1689 		if (ret == -EINVAL)
1690 			ret = 0;
1691 	} else {
1692 		ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
1693 	}
1694 
1695 unref:
1696 	drm_gem_object_unreference(&obj->base);
1697 unlock:
1698 	mutex_unlock(&dev->struct_mutex);
1699 	return ret;
1700 }
1701 
1702 /**
1703  * Called when user space has done writes to this buffer
1704  */
1705 int
i915_gem_sw_finish_ioctl(struct drm_device * dev,void * data,struct drm_file * file)1706 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1707 			 struct drm_file *file)
1708 {
1709 	struct drm_i915_gem_sw_finish *args = data;
1710 	struct drm_gem_object *gobj;
1711 	struct drm_i915_gem_object *obj;
1712 	int ret = 0;
1713 
1714 	ret = i915_mutex_lock_interruptible(dev);
1715 	if (ret)
1716 		return ret;
1717 
1718 	gobj = drm_gem_object_lookup(dev, file, args->handle);
1719 	if (gobj == NULL) {
1720 		ret = -ENOENT;
1721 		goto unlock;
1722 	}
1723 	obj = to_intel_bo(gobj);
1724 
1725 	/* Pinned buffers may be scanout, so flush the cache */
1726 	if (obj->pin_display)
1727 		i915_gem_object_flush_cpu_write_domain(obj, true);
1728 
1729 	drm_gem_object_unreference(&obj->base);
1730 unlock:
1731 	mutex_unlock(&dev->struct_mutex);
1732 	return ret;
1733 }
1734 
1735 /**
1736  * Maps the contents of an object, returning the address it is mapped
1737  * into.
1738  *
1739  * While the mapping holds a reference on the contents of the object, it doesn't
1740  * imply a ref on the object itself.
1741  */
1742 int
i915_gem_mmap_ioctl(struct drm_device * dev,void * data,struct drm_file * file)1743 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1744 		    struct drm_file *file)
1745 {
1746 	struct drm_i915_gem_mmap *args = data;
1747 	struct drm_gem_object *obj;
1748 	unsigned long addr;
1749 #ifdef __NetBSD__
1750 	int ret;
1751 #endif
1752 
1753 	obj = drm_gem_object_lookup(dev, file, args->handle);
1754 	if (obj == NULL)
1755 		return -ENOENT;
1756 
1757 	/* prime objects have no backing filp to GEM mmap
1758 	 * pages from.
1759 	 */
1760 #ifdef __NetBSD__
1761 	/* Also stolen objects (XXX can we get them here?)  */
1762 	if (obj->gemo_shm_uao == NULL) {
1763 		drm_gem_object_unreference_unlocked(obj);
1764 		return -EINVAL;
1765 	}
1766 #else
1767 	if (!obj->filp) {
1768 		drm_gem_object_unreference_unlocked(obj);
1769 		return -EINVAL;
1770 	}
1771 #endif
1772 
1773 #ifdef __NetBSD__
1774 	addr = (*curproc->p_emul->e_vm_default_addr)(curproc,
1775 	    (vaddr_t)curproc->p_vmspace->vm_daddr, args->size,
1776 	    curproc->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN);
1777 	/* XXX errno NetBSD->Linux */
1778 	ret = -uvm_map(&curproc->p_vmspace->vm_map, &addr, args->size,
1779 	    obj->gemo_shm_uao, args->offset, 0,
1780 	    UVM_MAPFLAG((VM_PROT_READ | VM_PROT_WRITE),
1781 		(VM_PROT_READ | VM_PROT_WRITE), UVM_INH_COPY, UVM_ADV_NORMAL,
1782 		0));
1783 	if (ret) {
1784 		drm_gem_object_unreference_unlocked(obj);
1785 		return ret;
1786 	}
1787 	uao_reference(obj->gemo_shm_uao);
1788 	drm_gem_object_unreference_unlocked(obj);
1789 #else
1790 	addr = vm_mmap(obj->filp, 0, args->size,
1791 		       PROT_READ | PROT_WRITE, MAP_SHARED,
1792 		       args->offset);
1793 	drm_gem_object_unreference_unlocked(obj);
1794 	if (IS_ERR((void *)addr))
1795 		return addr;
1796 #endif
1797 
1798 	args->addr_ptr = (uint64_t) addr;
1799 
1800 	return 0;
1801 }
1802 
1803 #ifdef __NetBSD__		/* XXX gem gtt fault */
1804 static int	i915_udv_fault(struct uvm_faultinfo *, vaddr_t,
1805 		    struct vm_page **, int, int, vm_prot_t, int, paddr_t);
1806 
1807 int
i915_gem_fault(struct uvm_faultinfo * ufi,vaddr_t vaddr,struct vm_page ** pps,int npages,int centeridx,vm_prot_t access_type,int flags)1808 i915_gem_fault(struct uvm_faultinfo *ufi, vaddr_t vaddr, struct vm_page **pps,
1809     int npages, int centeridx, vm_prot_t access_type, int flags)
1810 {
1811 	struct uvm_object *uobj = ufi->entry->object.uvm_obj;
1812 	struct drm_gem_object *gem_obj =
1813 	    container_of(uobj, struct drm_gem_object, gemo_uvmobj);
1814 	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
1815 	struct drm_device *dev = obj->base.dev;
1816 	struct drm_i915_private *dev_priv = dev->dev_private;
1817 	voff_t byte_offset;
1818 	pgoff_t page_offset;
1819 	int ret = 0;
1820 	bool write = ISSET(access_type, VM_PROT_WRITE)? 1 : 0;
1821 
1822 	byte_offset = (ufi->entry->offset + (vaddr - ufi->entry->start));
1823 	KASSERT(byte_offset <= obj->base.size);
1824 	page_offset = (byte_offset >> PAGE_SHIFT);
1825 
1826 	intel_runtime_pm_get(dev_priv);
1827 
1828 	/* Thanks, uvm, but we don't need this lock.  */
1829 	mutex_exit(uobj->vmobjlock);
1830 
1831 	ret = i915_mutex_lock_interruptible(dev);
1832 	if (ret)
1833 		goto out;
1834 
1835 	trace_i915_gem_object_fault(obj, page_offset, true, write);
1836 
1837 	ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write);
1838 	if (ret)
1839 		goto unlock;
1840 
1841 	if ((obj->cache_level != I915_CACHE_NONE) && !HAS_LLC(dev)) {
1842 		ret = -EINVAL;
1843 		goto unlock;
1844 	}
1845 
1846 	ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE);
1847 	if (ret)
1848 		goto unlock;
1849 
1850 	ret = i915_gem_object_set_to_gtt_domain(obj, write);
1851 	if (ret)
1852 		goto unpin;
1853 
1854 	ret = i915_gem_object_get_fence(obj);
1855 	if (ret)
1856 		goto unpin;
1857 
1858 	obj->fault_mappable = true;
1859 
1860 	/* XXX errno NetBSD->Linux */
1861 	ret = -i915_udv_fault(ufi, vaddr, pps, npages, centeridx, access_type,
1862 	    flags,
1863 	    (dev_priv->gtt.mappable_base + i915_gem_obj_ggtt_offset(obj)));
1864 unpin:
1865 	i915_gem_object_ggtt_unpin(obj);
1866 unlock:
1867 	mutex_unlock(&dev->struct_mutex);
1868 out:
1869 	mutex_enter(uobj->vmobjlock);
1870 	uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, uobj);
1871 	if (ret == -ERESTART)
1872 		uvm_wait("i915flt");
1873 
1874 	/*
1875 	 * Remap EINTR to success, so that we return to userland.
1876 	 * On the way out, we'll deliver the signal, and if the signal
1877 	 * is not fatal then the user code which faulted will most likely
1878 	 * fault again, and we'll come back here for another try.
1879 	 */
1880 	if (ret == -EINTR)
1881 		ret = 0;
1882 	/* XXX Deal with GPU hangs here...  */
1883 	intel_runtime_pm_put(dev_priv);
1884 	/* XXX errno Linux->NetBSD */
1885 	return -ret;
1886 }
1887 
1888 /*
1889  * XXX i915_udv_fault is copypasta of udv_fault from uvm_device.c.
1890  *
1891  * XXX pmap_enter_default instead of pmap_enter because of a problem
1892  * with using weak aliases in kernel modules or something.
1893  */
1894 int	pmap_enter_default(pmap_t, vaddr_t, paddr_t, vm_prot_t, unsigned);
1895 
1896 static int
i915_udv_fault(struct uvm_faultinfo * ufi,vaddr_t vaddr,struct vm_page ** pps,int npages,int centeridx,vm_prot_t access_type,int flags,paddr_t gtt_paddr)1897 i915_udv_fault(struct uvm_faultinfo *ufi, vaddr_t vaddr, struct vm_page **pps,
1898     int npages, int centeridx, vm_prot_t access_type, int flags,
1899     paddr_t gtt_paddr)
1900 {
1901 	struct vm_map_entry *entry = ufi->entry;
1902 	vaddr_t curr_va;
1903 	off_t curr_offset;
1904 	paddr_t paddr;
1905 	u_int mmapflags;
1906 	int lcv, retval;
1907 	vm_prot_t mapprot;
1908 	UVMHIST_FUNC("i915_udv_fault"); UVMHIST_CALLED(maphist);
1909 	UVMHIST_LOG(maphist,"  flags=%d", flags,0,0,0);
1910 
1911 	/*
1912 	 * we do not allow device mappings to be mapped copy-on-write
1913 	 * so we kill any attempt to do so here.
1914 	 */
1915 
1916 	if (UVM_ET_ISCOPYONWRITE(entry)) {
1917 		UVMHIST_LOG(maphist, "<- failed -- COW entry (etype=0x%x)",
1918 		entry->etype, 0,0,0);
1919 		return(EIO);
1920 	}
1921 
1922 	/*
1923 	 * now we must determine the offset in udv to use and the VA to
1924 	 * use for pmap_enter.  note that we always use orig_map's pmap
1925 	 * for pmap_enter (even if we have a submap).   since virtual
1926 	 * addresses in a submap must match the main map, this is ok.
1927 	 */
1928 
1929 	/* udv offset = (offset from start of entry) + entry's offset */
1930 	curr_offset = entry->offset + (vaddr - entry->start);
1931 	/* pmap va = vaddr (virtual address of pps[0]) */
1932 	curr_va = vaddr;
1933 
1934 	/*
1935 	 * loop over the page range entering in as needed
1936 	 */
1937 
1938 	retval = 0;
1939 	for (lcv = 0 ; lcv < npages ; lcv++, curr_offset += PAGE_SIZE,
1940 	    curr_va += PAGE_SIZE) {
1941 		if ((flags & PGO_ALLPAGES) == 0 && lcv != centeridx)
1942 			continue;
1943 
1944 		if (pps[lcv] == PGO_DONTCARE)
1945 			continue;
1946 
1947 		paddr = (gtt_paddr + curr_offset);
1948 		mmapflags = 0;
1949 		mapprot = ufi->entry->protection;
1950 		UVMHIST_LOG(maphist,
1951 		    "  MAPPING: device: pm=0x%x, va=0x%x, pa=0x%lx, at=%d",
1952 		    ufi->orig_map->pmap, curr_va, paddr, mapprot);
1953 		if (pmap_enter_default(ufi->orig_map->pmap, curr_va, paddr, mapprot,
1954 		    PMAP_CANFAIL | mapprot | mmapflags) != 0) {
1955 			/*
1956 			 * pmap_enter() didn't have the resource to
1957 			 * enter this mapping.  Unlock everything,
1958 			 * wait for the pagedaemon to free up some
1959 			 * pages, and then tell uvm_fault() to start
1960 			 * the fault again.
1961 			 *
1962 			 * XXX Needs some rethinking for the PGO_ALLPAGES
1963 			 * XXX case.
1964 			 */
1965 			pmap_update(ufi->orig_map->pmap);	/* sync what we have so far */
1966 			return (ERESTART);
1967 		}
1968 	}
1969 
1970 	pmap_update(ufi->orig_map->pmap);
1971 	return (retval);
1972 }
1973 #else
1974 /**
1975  * i915_gem_fault - fault a page into the GTT
1976  * vma: VMA in question
1977  * vmf: fault info
1978  *
1979  * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1980  * from userspace.  The fault handler takes care of binding the object to
1981  * the GTT (if needed), allocating and programming a fence register (again,
1982  * only if needed based on whether the old reg is still valid or the object
1983  * is tiled) and inserting a new PTE into the faulting process.
1984  *
1985  * Note that the faulting process may involve evicting existing objects
1986  * from the GTT and/or fence registers to make room.  So performance may
1987  * suffer if the GTT working set is large or there are few fence registers
1988  * left.
1989  */
i915_gem_fault(struct vm_area_struct * vma,struct vm_fault * vmf)1990 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1991 {
1992 	struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data);
1993 	struct drm_device *dev = obj->base.dev;
1994 	struct drm_i915_private *dev_priv = dev->dev_private;
1995 	pgoff_t page_offset;
1996 	unsigned long pfn;
1997 	int ret = 0;
1998 	bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
1999 
2000 	intel_runtime_pm_get(dev_priv);
2001 
2002 	/* We don't use vmf->pgoff since that has the fake offset */
2003 	page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
2004 		PAGE_SHIFT;
2005 
2006 	ret = i915_mutex_lock_interruptible(dev);
2007 	if (ret)
2008 		goto out;
2009 
2010 	trace_i915_gem_object_fault(obj, page_offset, true, write);
2011 
2012 	/* Try to flush the object off the GPU first without holding the lock.
2013 	 * Upon reacquiring the lock, we will perform our sanity checks and then
2014 	 * repeat the flush holding the lock in the normal manner to catch cases
2015 	 * where we are gazumped.
2016 	 */
2017 	ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write);
2018 	if (ret)
2019 		goto unlock;
2020 
2021 	/* Access to snoopable pages through the GTT is incoherent. */
2022 	if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) {
2023 		ret = -EINVAL;
2024 		goto unlock;
2025 	}
2026 
2027 	/* Now bind it into the GTT if needed */
2028 	ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE);
2029 	if (ret)
2030 		goto unlock;
2031 
2032 	ret = i915_gem_object_set_to_gtt_domain(obj, write);
2033 	if (ret)
2034 		goto unpin;
2035 
2036 	ret = i915_gem_object_get_fence(obj);
2037 	if (ret)
2038 		goto unpin;
2039 
2040 	obj->fault_mappable = true;
2041 
2042 	pfn = dev_priv->gtt.mappable_base + i915_gem_obj_ggtt_offset(obj);
2043 	pfn >>= PAGE_SHIFT;
2044 	pfn += page_offset;
2045 
2046 	/* Finally, remap it using the new GTT offset */
2047 	ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
2048 unpin:
2049 	i915_gem_object_ggtt_unpin(obj);
2050 unlock:
2051 	mutex_unlock(&dev->struct_mutex);
2052 out:
2053 	switch (ret) {
2054 	case -EIO:
2055 		/* If this -EIO is due to a gpu hang, give the reset code a
2056 		 * chance to clean up the mess. Otherwise return the proper
2057 		 * SIGBUS. */
2058 		if (i915_terminally_wedged(&dev_priv->gpu_error)) {
2059 			ret = VM_FAULT_SIGBUS;
2060 			break;
2061 		}
2062 	case -EAGAIN:
2063 		/*
2064 		 * EAGAIN means the gpu is hung and we'll wait for the error
2065 		 * handler to reset everything when re-faulting in
2066 		 * i915_mutex_lock_interruptible.
2067 		 */
2068 	case 0:
2069 	case -ERESTARTSYS:
2070 	case -EINTR:
2071 	case -EBUSY:
2072 		/*
2073 		 * EBUSY is ok: this just means that another thread
2074 		 * already did the job.
2075 		 */
2076 		ret = VM_FAULT_NOPAGE;
2077 		break;
2078 	case -ENOMEM:
2079 		ret = VM_FAULT_OOM;
2080 		break;
2081 	case -ENOSPC:
2082 	case -EFAULT:
2083 		ret = VM_FAULT_SIGBUS;
2084 		break;
2085 	default:
2086 		WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret);
2087 		ret = VM_FAULT_SIGBUS;
2088 		break;
2089 	}
2090 
2091 	intel_runtime_pm_put(dev_priv);
2092 	return ret;
2093 }
2094 
i915_gem_release_all_mmaps(struct drm_i915_private * dev_priv)2095 void i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv)
2096 {
2097 	struct i915_vma *vma;
2098 
2099 	/*
2100 	 * Only the global gtt is relevant for gtt memory mappings, so restrict
2101 	 * list traversal to objects bound into the global address space. Note
2102 	 * that the active list should be empty, but better safe than sorry.
2103 	 */
2104 	WARN_ON(!list_empty(&dev_priv->gtt.base.active_list));
2105 	list_for_each_entry(vma, &dev_priv->gtt.base.active_list, mm_list)
2106 		i915_gem_release_mmap(vma->obj);
2107 	list_for_each_entry(vma, &dev_priv->gtt.base.inactive_list, mm_list)
2108 		i915_gem_release_mmap(vma->obj);
2109 }
2110 #endif
2111 
2112 /**
2113  * i915_gem_release_mmap - remove physical page mappings
2114  * @obj: obj in question
2115  *
2116  * Preserve the reservation of the mmapping with the DRM core code, but
2117  * relinquish ownership of the pages back to the system.
2118  *
2119  * It is vital that we remove the page mapping if we have mapped a tiled
2120  * object through the GTT and then lose the fence register due to
2121  * resource pressure. Similarly if the object has been moved out of the
2122  * aperture, than pages mapped into userspace must be revoked. Removing the
2123  * mapping will then trigger a page fault on the next user access, allowing
2124  * fixup by i915_gem_fault().
2125  */
2126 void
i915_gem_release_mmap(struct drm_i915_gem_object * obj)2127 i915_gem_release_mmap(struct drm_i915_gem_object *obj)
2128 {
2129 	if (!obj->fault_mappable)
2130 		return;
2131 
2132 #ifdef __NetBSD__		/* XXX gem gtt fault */
2133 	{
2134 		struct drm_device *const dev = obj->base.dev;
2135 		struct drm_i915_private *const dev_priv = dev->dev_private;
2136 		const paddr_t start = dev_priv->gtt.mappable_base +
2137 		    i915_gem_obj_ggtt_offset(obj);
2138 		const size_t size = obj->base.size;
2139 		const paddr_t end = start + size;
2140 		paddr_t pa;
2141 
2142 		KASSERT((start & (PAGE_SIZE - 1)) == 0);
2143 		KASSERT((size & (PAGE_SIZE - 1)) == 0);
2144 
2145 		for (pa = start; pa < end; pa += PAGE_SIZE)
2146 			pmap_pv_protect(pa, VM_PROT_NONE);
2147 	}
2148 #else
2149 	drm_vma_node_unmap(&obj->base.vma_node,
2150 			   obj->base.dev->anon_inode->i_mapping);
2151 #endif
2152 	obj->fault_mappable = false;
2153 }
2154 
2155 uint32_t
i915_gem_get_gtt_size(struct drm_device * dev,uint32_t size,int tiling_mode)2156 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
2157 {
2158 	uint32_t gtt_size;
2159 
2160 	if (INTEL_INFO(dev)->gen >= 4 ||
2161 	    tiling_mode == I915_TILING_NONE)
2162 		return size;
2163 
2164 	/* Previous chips need a power-of-two fence region when tiling */
2165 	if (INTEL_INFO(dev)->gen == 3)
2166 		gtt_size = 1024*1024;
2167 	else
2168 		gtt_size = 512*1024;
2169 
2170 	while (gtt_size < size)
2171 		gtt_size <<= 1;
2172 
2173 	return gtt_size;
2174 }
2175 
2176 /**
2177  * i915_gem_get_gtt_alignment - return required GTT alignment for an object
2178  * @obj: object to check
2179  *
2180  * Return the required GTT alignment for an object, taking into account
2181  * potential fence register mapping.
2182  */
2183 uint32_t
i915_gem_get_gtt_alignment(struct drm_device * dev,uint32_t size,int tiling_mode,bool fenced)2184 i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size,
2185 			   int tiling_mode, bool fenced)
2186 {
2187 	/*
2188 	 * Minimum alignment is 4k (GTT page size), but might be greater
2189 	 * if a fence register is needed for the object.
2190 	 */
2191 	if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) ||
2192 	    tiling_mode == I915_TILING_NONE)
2193 		return 4096;
2194 
2195 	/*
2196 	 * Previous chips need to be aligned to the size of the smallest
2197 	 * fence register that can contain the object.
2198 	 */
2199 	return i915_gem_get_gtt_size(dev, size, tiling_mode);
2200 }
2201 
i915_gem_object_create_mmap_offset(struct drm_i915_gem_object * obj)2202 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
2203 {
2204 	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2205 	int ret;
2206 
2207 	if (drm_vma_node_has_offset(&obj->base.vma_node))
2208 		return 0;
2209 
2210 	dev_priv->mm.shrinker_no_lock_stealing = true;
2211 
2212 	ret = drm_gem_create_mmap_offset(&obj->base);
2213 	if (ret != -ENOSPC)
2214 		goto out;
2215 
2216 	/* Badly fragmented mmap space? The only way we can recover
2217 	 * space is by destroying unwanted objects. We can't randomly release
2218 	 * mmap_offsets as userspace expects them to be persistent for the
2219 	 * lifetime of the objects. The closest we can is to release the
2220 	 * offsets on purgeable objects by truncating it and marking it purged,
2221 	 * which prevents userspace from ever using that object again.
2222 	 */
2223 	i915_gem_purge(dev_priv, obj->base.size >> PAGE_SHIFT);
2224 	ret = drm_gem_create_mmap_offset(&obj->base);
2225 	if (ret != -ENOSPC)
2226 		goto out;
2227 
2228 	i915_gem_shrink_all(dev_priv);
2229 	ret = drm_gem_create_mmap_offset(&obj->base);
2230 out:
2231 	dev_priv->mm.shrinker_no_lock_stealing = false;
2232 
2233 	return ret;
2234 }
2235 
i915_gem_object_free_mmap_offset(struct drm_i915_gem_object * obj)2236 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
2237 {
2238 	drm_gem_free_mmap_offset(&obj->base);
2239 }
2240 
2241 int
i915_gem_mmap_gtt(struct drm_file * file,struct drm_device * dev,uint32_t handle,uint64_t * offset)2242 i915_gem_mmap_gtt(struct drm_file *file,
2243 		  struct drm_device *dev,
2244 		  uint32_t handle,
2245 		  uint64_t *offset)
2246 {
2247 	struct drm_i915_private *dev_priv = dev->dev_private;
2248 	struct drm_gem_object *gobj;
2249 	struct drm_i915_gem_object *obj;
2250 	int ret;
2251 
2252 	ret = i915_mutex_lock_interruptible(dev);
2253 	if (ret)
2254 		return ret;
2255 
2256 	gobj = drm_gem_object_lookup(dev, file, handle);
2257 	if (gobj == NULL) {
2258 		ret = -ENOENT;
2259 		goto unlock;
2260 	}
2261 	obj = to_intel_bo(gobj);
2262 
2263 	if (obj->base.size > dev_priv->gtt.mappable_end) {
2264 		ret = -E2BIG;
2265 		goto out;
2266 	}
2267 
2268 	if (obj->madv != I915_MADV_WILLNEED) {
2269 		DRM_DEBUG("Attempting to mmap a purgeable buffer\n");
2270 		ret = -EFAULT;
2271 		goto out;
2272 	}
2273 
2274 	ret = i915_gem_object_create_mmap_offset(obj);
2275 	if (ret)
2276 		goto out;
2277 
2278 	*offset = drm_vma_node_offset_addr(&obj->base.vma_node);
2279 
2280 out:
2281 	drm_gem_object_unreference(&obj->base);
2282 unlock:
2283 	mutex_unlock(&dev->struct_mutex);
2284 	return ret;
2285 }
2286 
2287 /**
2288  * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
2289  * @dev: DRM device
2290  * @data: GTT mapping ioctl data
2291  * @file: GEM object info
2292  *
2293  * Simply returns the fake offset to userspace so it can mmap it.
2294  * The mmap call will end up in drm_gem_mmap(), which will set things
2295  * up so we can get faults in the handler above.
2296  *
2297  * The fault handler will take care of binding the object into the GTT
2298  * (since it may have been evicted to make room for something), allocating
2299  * a fence register, and mapping the appropriate aperture address into
2300  * userspace.
2301  */
2302 int
i915_gem_mmap_gtt_ioctl(struct drm_device * dev,void * data,struct drm_file * file)2303 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
2304 			struct drm_file *file)
2305 {
2306 	struct drm_i915_gem_mmap_gtt *args = data;
2307 
2308 	return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
2309 }
2310 
2311 /* Immediately discard the backing storage */
2312 static void
i915_gem_object_truncate(struct drm_i915_gem_object * obj)2313 i915_gem_object_truncate(struct drm_i915_gem_object *obj)
2314 {
2315 #ifndef __NetBSD__
2316 	struct inode *inode;
2317 #endif
2318 
2319 	i915_gem_object_free_mmap_offset(obj);
2320 
2321 #ifdef __NetBSD__
2322 	if (obj->base.gemo_shm_uao == NULL)
2323 		return;
2324 
2325 	{
2326 		struct uvm_object *const uobj = obj->base.gemo_shm_uao;
2327 
2328 		if (uobj != NULL) {
2329 			/* XXX Calling pgo_put like this is bogus.  */
2330 			mutex_enter(uobj->vmobjlock);
2331 			(*uobj->pgops->pgo_put)(uobj, 0, obj->base.size,
2332 			    (PGO_ALLPAGES | PGO_FREE));
2333 		}
2334 	}
2335 #else
2336 	if (obj->base.filp == NULL)
2337 		return;
2338 
2339 	/* Our goal here is to return as much of the memory as
2340 	 * is possible back to the system as we are called from OOM.
2341 	 * To do this we must instruct the shmfs to drop all of its
2342 	 * backing pages, *now*.
2343 	 */
2344 	inode = file_inode(obj->base.filp);
2345 	shmem_truncate_range(inode, 0, (loff_t)-1);
2346 #endif
2347 
2348 	obj->madv = __I915_MADV_PURGED;
2349 }
2350 
2351 static inline int
i915_gem_object_is_purgeable(struct drm_i915_gem_object * obj)2352 i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj)
2353 {
2354 	return obj->madv == I915_MADV_DONTNEED;
2355 }
2356 
2357 #ifdef __NetBSD__
2358 static void
i915_gem_object_put_pages_gtt(struct drm_i915_gem_object * obj)2359 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
2360 {
2361 	struct drm_device *const dev = obj->base.dev;
2362 	struct vm_page *page;
2363 	int ret;
2364 
2365 	/* XXX Cargo-culted from the Linux code.  */
2366 	BUG_ON(obj->madv == __I915_MADV_PURGED);
2367 
2368 	ret = i915_gem_object_set_to_cpu_domain(obj, true);
2369 	if (ret) {
2370 		WARN_ON(ret != -EIO);
2371 		i915_gem_clflush_object(obj, true);
2372 		obj->base.read_domains = obj->base.write_domain =
2373 		    I915_GEM_DOMAIN_CPU;
2374 	}
2375 
2376 	if (i915_gem_object_needs_bit17_swizzle(obj))
2377 		i915_gem_object_save_bit_17_swizzle(obj);
2378 
2379 	if (obj->madv == I915_MADV_DONTNEED)
2380 		obj->dirty = 0;
2381 
2382 	if (obj->dirty) {
2383 		TAILQ_FOREACH(page, &obj->igo_pageq, pageq.queue) {
2384 			page->flags &= ~PG_CLEAN;
2385 			/* XXX mark page accessed */
2386 		}
2387 	}
2388 
2389 	bus_dmamap_destroy(dev->dmat, obj->igo_dmamap);
2390 	bus_dmamem_unwire_uvm_object(dev->dmat, obj->base.gemo_shm_uao, 0,
2391 	    obj->base.size, obj->pages, obj->igo_nsegs);
2392 
2393 	kfree(obj->pages);
2394 }
2395 #else
2396 static void
i915_gem_object_put_pages_gtt(struct drm_i915_gem_object * obj)2397 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
2398 {
2399 	struct sg_page_iter sg_iter;
2400 	int ret;
2401 
2402 	BUG_ON(obj->madv == __I915_MADV_PURGED);
2403 
2404 	ret = i915_gem_object_set_to_cpu_domain(obj, true);
2405 	if (ret) {
2406 		/* In the event of a disaster, abandon all caches and
2407 		 * hope for the best.
2408 		 */
2409 		WARN_ON(ret != -EIO);
2410 		i915_gem_clflush_object(obj, true);
2411 		obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
2412 	}
2413 
2414 	if (i915_gem_object_needs_bit17_swizzle(obj))
2415 		i915_gem_object_save_bit_17_swizzle(obj);
2416 
2417 	if (obj->madv == I915_MADV_DONTNEED)
2418 		obj->dirty = 0;
2419 
2420 	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
2421 		struct page *page = sg_page_iter_page(&sg_iter);
2422 
2423 		if (obj->dirty)
2424 			set_page_dirty(page);
2425 
2426 		if (obj->madv == I915_MADV_WILLNEED)
2427 			mark_page_accessed(page);
2428 
2429 		page_cache_release(page);
2430 	}
2431 	obj->dirty = 0;
2432 
2433 	sg_free_table(obj->pages);
2434 	kfree(obj->pages);
2435 }
2436 #endif
2437 
2438 int
i915_gem_object_put_pages(struct drm_i915_gem_object * obj)2439 i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
2440 {
2441 	const struct drm_i915_gem_object_ops *ops = obj->ops;
2442 
2443 	if (obj->pages == NULL)
2444 		return 0;
2445 
2446 	if (obj->pages_pin_count)
2447 		return -EBUSY;
2448 
2449 	BUG_ON(i915_gem_obj_bound_any(obj));
2450 
2451 	/* ->put_pages might need to allocate memory for the bit17 swizzle
2452 	 * array, hence protect them from being reaped by removing them from gtt
2453 	 * lists early. */
2454 	list_del(&obj->global_list);
2455 
2456 	ops->put_pages(obj);
2457 	obj->pages = NULL;
2458 
2459 	if (i915_gem_object_is_purgeable(obj))
2460 		i915_gem_object_truncate(obj);
2461 
2462 	return 0;
2463 }
2464 
2465 static unsigned long
__i915_gem_shrink(struct drm_i915_private * dev_priv,long target,bool purgeable_only)2466 __i915_gem_shrink(struct drm_i915_private *dev_priv, long target,
2467 		  bool purgeable_only)
2468 {
2469 	struct list_head still_bound_list;
2470 	struct drm_i915_gem_object *obj, *next;
2471 	unsigned long count = 0;
2472 
2473 	list_for_each_entry_safe(obj, next,
2474 				 &dev_priv->mm.unbound_list,
2475 				 global_list) {
2476 		if ((i915_gem_object_is_purgeable(obj) || !purgeable_only) &&
2477 		    i915_gem_object_put_pages(obj) == 0) {
2478 			count += obj->base.size >> PAGE_SHIFT;
2479 			if (count >= target)
2480 				return count;
2481 		}
2482 	}
2483 
2484 	/*
2485 	 * As we may completely rewrite the bound list whilst unbinding
2486 	 * (due to retiring requests) we have to strictly process only
2487 	 * one element of the list at the time, and recheck the list
2488 	 * on every iteration.
2489 	 */
2490 	INIT_LIST_HEAD(&still_bound_list);
2491 	while (count < target && !list_empty(&dev_priv->mm.bound_list)) {
2492 		struct i915_vma *vma, *v;
2493 
2494 		obj = list_first_entry(&dev_priv->mm.bound_list,
2495 				       typeof(*obj), global_list);
2496 		list_move_tail(&obj->global_list, &still_bound_list);
2497 
2498 		if (!i915_gem_object_is_purgeable(obj) && purgeable_only)
2499 			continue;
2500 
2501 		/*
2502 		 * Hold a reference whilst we unbind this object, as we may
2503 		 * end up waiting for and retiring requests. This might
2504 		 * release the final reference (held by the active list)
2505 		 * and result in the object being freed from under us.
2506 		 * in this object being freed.
2507 		 *
2508 		 * Note 1: Shrinking the bound list is special since only active
2509 		 * (and hence bound objects) can contain such limbo objects, so
2510 		 * we don't need special tricks for shrinking the unbound list.
2511 		 * The only other place where we have to be careful with active
2512 		 * objects suddenly disappearing due to retiring requests is the
2513 		 * eviction code.
2514 		 *
2515 		 * Note 2: Even though the bound list doesn't hold a reference
2516 		 * to the object we can safely grab one here: The final object
2517 		 * unreferencing and the bound_list are both protected by the
2518 		 * dev->struct_mutex and so we won't ever be able to observe an
2519 		 * object on the bound_list with a reference count equals 0.
2520 		 */
2521 		drm_gem_object_reference(&obj->base);
2522 
2523 		list_for_each_entry_safe(vma, v, &obj->vma_list, vma_link)
2524 			if (i915_vma_unbind(vma))
2525 				break;
2526 
2527 		if (i915_gem_object_put_pages(obj) == 0)
2528 			count += obj->base.size >> PAGE_SHIFT;
2529 
2530 		drm_gem_object_unreference(&obj->base);
2531 	}
2532 	list_splice(&still_bound_list, &dev_priv->mm.bound_list);
2533 
2534 	return count;
2535 }
2536 
2537 static unsigned long
i915_gem_purge(struct drm_i915_private * dev_priv,long target)2538 i915_gem_purge(struct drm_i915_private *dev_priv, long target)
2539 {
2540 	return __i915_gem_shrink(dev_priv, target, true);
2541 }
2542 
2543 static unsigned long
i915_gem_shrink_all(struct drm_i915_private * dev_priv)2544 i915_gem_shrink_all(struct drm_i915_private *dev_priv)
2545 {
2546 	struct drm_i915_gem_object *obj, *next;
2547 	long freed = 0;
2548 
2549 	i915_gem_evict_everything(dev_priv->dev);
2550 
2551 	list_for_each_entry_safe(obj, next, &dev_priv->mm.unbound_list,
2552 				 global_list) {
2553 		if (i915_gem_object_put_pages(obj) == 0)
2554 			freed += obj->base.size >> PAGE_SHIFT;
2555 	}
2556 	return freed;
2557 }
2558 
2559 #ifdef __NetBSD__
2560 static int
i915_gem_object_get_pages_gtt(struct drm_i915_gem_object * obj)2561 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
2562 {
2563 	struct drm_device *const dev = obj->base.dev;
2564 	struct vm_page *page;
2565 	int error;
2566 
2567 	/* XXX Cargo-culted from the Linux code.  */
2568 	BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
2569 	BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
2570 
2571 	KASSERT(obj->pages == NULL);
2572 	TAILQ_INIT(&obj->igo_pageq);
2573 	obj->pages = kcalloc((obj->base.size / PAGE_SIZE),
2574 	    sizeof(obj->pages[0]), GFP_KERNEL);
2575 	if (obj->pages == NULL) {
2576 		error = -ENOMEM;
2577 		goto fail0;
2578 	}
2579 
2580 	/* XXX errno NetBSD->Linux */
2581 	error = -bus_dmamem_wire_uvm_object(dev->dmat, obj->base.gemo_shm_uao,
2582 	    0, obj->base.size, &obj->igo_pageq, PAGE_SIZE, 0, obj->pages,
2583 	    (obj->base.size / PAGE_SIZE), &obj->igo_nsegs, BUS_DMA_NOWAIT);
2584 	if (error)
2585 		/* XXX Try i915_gem_purge, i915_gem_shrink_all.  */
2586 		goto fail1;
2587 	KASSERT(0 < obj->igo_nsegs);
2588 	KASSERT(obj->igo_nsegs <= (obj->base.size / PAGE_SIZE));
2589 
2590 	/*
2591 	 * Check that the paddrs will fit in 40 bits, or 32 bits on i965.
2592 	 *
2593 	 * XXX This is wrong; we ought to pass this constraint to
2594 	 * bus_dmamem_wire_uvm_object instead.
2595 	 */
2596 	TAILQ_FOREACH(page, &obj->igo_pageq, pageq.queue) {
2597 		const uint64_t mask =
2598 		    (IS_BROADWATER(dev) || IS_CRESTLINE(dev)?
2599 			0xffffffffULL : 0xffffffffffULL);
2600 		if (VM_PAGE_TO_PHYS(page) & ~mask) {
2601 			DRM_ERROR("GEM physical address exceeds %u bits"
2602 			    ": %"PRIxMAX"\n",
2603 			    popcount64(mask),
2604 			    (uintmax_t)VM_PAGE_TO_PHYS(page));
2605 			error = -EIO;
2606 			goto fail2;
2607 		}
2608 	}
2609 
2610 	/* XXX Should create the DMA map when creating the object.  */
2611 
2612 	/* XXX errno NetBSD->Linux */
2613 	error = -bus_dmamap_create(dev->dmat, obj->base.size, obj->igo_nsegs,
2614 	    PAGE_SIZE, 0, BUS_DMA_NOWAIT, &obj->igo_dmamap);
2615 	if (error)
2616 		goto fail2;
2617 
2618 	/* XXX Cargo-culted from the Linux code.  */
2619 	if (i915_gem_object_needs_bit17_swizzle(obj))
2620 		i915_gem_object_do_bit_17_swizzle(obj);
2621 
2622 	/* Success!  */
2623 	return 0;
2624 
2625 fail2:	bus_dmamem_unwire_uvm_object(dev->dmat, obj->base.gemo_shm_uao, 0,
2626 	    obj->base.size, obj->pages, (obj->base.size / PAGE_SIZE));
2627 fail1:	kfree(obj->pages);
2628 	obj->pages = NULL;
2629 fail0:	KASSERT(error);
2630 	return error;
2631 }
2632 #else
2633 static int
i915_gem_object_get_pages_gtt(struct drm_i915_gem_object * obj)2634 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
2635 {
2636 	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2637 	int page_count, i;
2638 	struct address_space *mapping;
2639 	struct sg_table *st;
2640 	struct scatterlist *sg;
2641 	struct sg_page_iter sg_iter;
2642 	struct page *page;
2643 	unsigned long last_pfn = 0;	/* suppress gcc warning */
2644 	gfp_t gfp;
2645 
2646 	/* Assert that the object is not currently in any GPU domain. As it
2647 	 * wasn't in the GTT, there shouldn't be any way it could have been in
2648 	 * a GPU cache
2649 	 */
2650 	BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
2651 	BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
2652 
2653 	st = kmalloc(sizeof(*st), GFP_KERNEL);
2654 	if (st == NULL)
2655 		return -ENOMEM;
2656 
2657 	page_count = obj->base.size / PAGE_SIZE;
2658 	if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
2659 		kfree(st);
2660 		return -ENOMEM;
2661 	}
2662 
2663 	/* Get the list of pages out of our struct file.  They'll be pinned
2664 	 * at this point until we release them.
2665 	 *
2666 	 * Fail silently without starting the shrinker
2667 	 */
2668 	mapping = file_inode(obj->base.filp)->i_mapping;
2669 	gfp = mapping_gfp_mask(mapping);
2670 	gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD;
2671 	gfp &= ~(__GFP_IO | __GFP_WAIT);
2672 	sg = st->sgl;
2673 	st->nents = 0;
2674 	for (i = 0; i < page_count; i++) {
2675 		page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2676 		if (IS_ERR(page)) {
2677 			i915_gem_purge(dev_priv, page_count);
2678 			page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2679 		}
2680 		if (IS_ERR(page)) {
2681 			/* We've tried hard to allocate the memory by reaping
2682 			 * our own buffer, now let the real VM do its job and
2683 			 * go down in flames if truly OOM.
2684 			 */
2685 			gfp &= ~(__GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD);
2686 			gfp |= __GFP_IO | __GFP_WAIT;
2687 
2688 			i915_gem_shrink_all(dev_priv);
2689 			page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2690 			if (IS_ERR(page))
2691 				goto err_pages;
2692 
2693 			gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD;
2694 			gfp &= ~(__GFP_IO | __GFP_WAIT);
2695 		}
2696 #ifdef CONFIG_SWIOTLB
2697 		if (swiotlb_nr_tbl()) {
2698 			st->nents++;
2699 			sg_set_page(sg, page, PAGE_SIZE, 0);
2700 			sg = sg_next(sg);
2701 			continue;
2702 		}
2703 #endif
2704 		if (!i || page_to_pfn(page) != last_pfn + 1) {
2705 			if (i)
2706 				sg = sg_next(sg);
2707 			st->nents++;
2708 			sg_set_page(sg, page, PAGE_SIZE, 0);
2709 		} else {
2710 			sg->length += PAGE_SIZE;
2711 		}
2712 		last_pfn = page_to_pfn(page);
2713 
2714 		/* Check that the i965g/gm workaround works. */
2715 		WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL));
2716 	}
2717 #ifdef CONFIG_SWIOTLB
2718 	if (!swiotlb_nr_tbl())
2719 #endif
2720 		sg_mark_end(sg);
2721 	obj->pages = st;
2722 
2723 	if (i915_gem_object_needs_bit17_swizzle(obj))
2724 		i915_gem_object_do_bit_17_swizzle(obj);
2725 
2726 	return 0;
2727 
2728 err_pages:
2729 	sg_mark_end(sg);
2730 	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0)
2731 		page_cache_release(sg_page_iter_page(&sg_iter));
2732 	sg_free_table(st);
2733 	kfree(st);
2734 	return PTR_ERR(page);
2735 }
2736 #endif
2737 
2738 /* Ensure that the associated pages are gathered from the backing storage
2739  * and pinned into our object. i915_gem_object_get_pages() may be called
2740  * multiple times before they are released by a single call to
2741  * i915_gem_object_put_pages() - once the pages are no longer referenced
2742  * either as a result of memory pressure (reaping pages under the shrinker)
2743  * or as the object is itself released.
2744  */
2745 int
i915_gem_object_get_pages(struct drm_i915_gem_object * obj)2746 i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2747 {
2748 	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2749 	const struct drm_i915_gem_object_ops *ops = obj->ops;
2750 	int ret;
2751 
2752 	if (obj->pages)
2753 		return 0;
2754 
2755 	if (obj->madv != I915_MADV_WILLNEED) {
2756 		DRM_DEBUG("Attempting to obtain a purgeable object\n");
2757 		return -EFAULT;
2758 	}
2759 
2760 	BUG_ON(obj->pages_pin_count);
2761 
2762 	ret = ops->get_pages(obj);
2763 	if (ret)
2764 		return ret;
2765 
2766 	list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
2767 	return 0;
2768 }
2769 
2770 static void
i915_gem_object_move_to_active(struct drm_i915_gem_object * obj,struct intel_ring_buffer * ring)2771 i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
2772 			       struct intel_ring_buffer *ring)
2773 {
2774 	struct drm_device *dev = obj->base.dev;
2775 	struct drm_i915_private *dev_priv = dev->dev_private;
2776 	u32 seqno = intel_ring_get_seqno(ring);
2777 
2778 	BUG_ON(ring == NULL);
2779 	if (obj->ring != ring && obj->last_write_seqno) {
2780 		/* Keep the seqno relative to the current ring */
2781 		obj->last_write_seqno = seqno;
2782 	}
2783 	obj->ring = ring;
2784 
2785 	/* Add a reference if we're newly entering the active list. */
2786 	if (!obj->active) {
2787 		drm_gem_object_reference(&obj->base);
2788 		obj->active = 1;
2789 	}
2790 
2791 	list_move_tail(&obj->ring_list, &ring->active_list);
2792 
2793 	obj->last_read_seqno = seqno;
2794 
2795 	if (obj->fenced_gpu_access) {
2796 		obj->last_fenced_seqno = seqno;
2797 
2798 		/* Bump MRU to take account of the delayed flush */
2799 		if (obj->fence_reg != I915_FENCE_REG_NONE) {
2800 			struct drm_i915_fence_reg *reg;
2801 
2802 			reg = &dev_priv->fence_regs[obj->fence_reg];
2803 			list_move_tail(&reg->lru_list,
2804 				       &dev_priv->mm.fence_list);
2805 		}
2806 	}
2807 }
2808 
i915_vma_move_to_active(struct i915_vma * vma,struct intel_ring_buffer * ring)2809 void i915_vma_move_to_active(struct i915_vma *vma,
2810 			     struct intel_ring_buffer *ring)
2811 {
2812 	list_move_tail(&vma->mm_list, &vma->vm->active_list);
2813 	return i915_gem_object_move_to_active(vma->obj, ring);
2814 }
2815 
2816 static void
i915_gem_object_move_to_inactive(struct drm_i915_gem_object * obj)2817 i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
2818 {
2819 	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2820 	struct i915_address_space *vm;
2821 	struct i915_vma *vma;
2822 
2823 	BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS);
2824 	BUG_ON(!obj->active);
2825 
2826 	list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
2827 		vma = i915_gem_obj_to_vma(obj, vm);
2828 		if (vma && !list_empty(&vma->mm_list))
2829 			list_move_tail(&vma->mm_list, &vm->inactive_list);
2830 	}
2831 
2832 	list_del_init(&obj->ring_list);
2833 	obj->ring = NULL;
2834 
2835 	obj->last_read_seqno = 0;
2836 	obj->last_write_seqno = 0;
2837 	obj->base.write_domain = 0;
2838 
2839 	obj->last_fenced_seqno = 0;
2840 	obj->fenced_gpu_access = false;
2841 
2842 	obj->active = 0;
2843 	drm_gem_object_unreference(&obj->base);
2844 
2845 	WARN_ON(i915_verify_lists(dev));
2846 }
2847 
2848 static int
i915_gem_init_seqno(struct drm_device * dev,u32 seqno)2849 i915_gem_init_seqno(struct drm_device *dev, u32 seqno)
2850 {
2851 	struct drm_i915_private *dev_priv = dev->dev_private;
2852 	struct intel_ring_buffer *ring;
2853 	int ret, i, j;
2854 
2855 	/* Carefully retire all requests without writing to the rings */
2856 	for_each_ring(ring, dev_priv, i) {
2857 		ret = intel_ring_idle(ring);
2858 		if (ret)
2859 			return ret;
2860 	}
2861 	i915_gem_retire_requests(dev);
2862 
2863 	/* Finally reset hw state */
2864 	for_each_ring(ring, dev_priv, i) {
2865 		intel_ring_init_seqno(ring, seqno);
2866 
2867 		for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++)
2868 			ring->sync_seqno[j] = 0;
2869 	}
2870 
2871 	return 0;
2872 }
2873 
i915_gem_set_seqno(struct drm_device * dev,u32 seqno)2874 int i915_gem_set_seqno(struct drm_device *dev, u32 seqno)
2875 {
2876 	struct drm_i915_private *dev_priv = dev->dev_private;
2877 	int ret;
2878 
2879 	if (seqno == 0)
2880 		return -EINVAL;
2881 
2882 	/* HWS page needs to be set less than what we
2883 	 * will inject to ring
2884 	 */
2885 	ret = i915_gem_init_seqno(dev, seqno - 1);
2886 	if (ret)
2887 		return ret;
2888 
2889 	/* Carefully set the last_seqno value so that wrap
2890 	 * detection still works
2891 	 */
2892 	dev_priv->next_seqno = seqno;
2893 	dev_priv->last_seqno = seqno - 1;
2894 	if (dev_priv->last_seqno == 0)
2895 		dev_priv->last_seqno--;
2896 
2897 	return 0;
2898 }
2899 
2900 int
i915_gem_get_seqno(struct drm_device * dev,u32 * seqno)2901 i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
2902 {
2903 	struct drm_i915_private *dev_priv = dev->dev_private;
2904 
2905 	/* reserve 0 for non-seqno */
2906 	if (dev_priv->next_seqno == 0) {
2907 		int ret = i915_gem_init_seqno(dev, 0);
2908 		if (ret)
2909 			return ret;
2910 
2911 		dev_priv->next_seqno = 1;
2912 	}
2913 
2914 	*seqno = dev_priv->last_seqno = dev_priv->next_seqno++;
2915 	return 0;
2916 }
2917 
__i915_add_request(struct intel_ring_buffer * ring,struct drm_file * file,struct drm_i915_gem_object * obj,u32 * out_seqno)2918 int __i915_add_request(struct intel_ring_buffer *ring,
2919 		       struct drm_file *file,
2920 		       struct drm_i915_gem_object *obj,
2921 		       u32 *out_seqno)
2922 {
2923 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
2924 	struct drm_i915_gem_request *request;
2925 	u32 request_ring_position, request_start;
2926 	int ret;
2927 
2928 	request_start = intel_ring_get_tail(ring);
2929 	/*
2930 	 * Emit any outstanding flushes - execbuf can fail to emit the flush
2931 	 * after having emitted the batchbuffer command. Hence we need to fix
2932 	 * things up similar to emitting the lazy request. The difference here
2933 	 * is that the flush _must_ happen before the next request, no matter
2934 	 * what.
2935 	 */
2936 	ret = intel_ring_flush_all_caches(ring);
2937 	if (ret)
2938 		return ret;
2939 
2940 	request = ring->preallocated_lazy_request;
2941 	if (WARN_ON(request == NULL))
2942 		return -ENOMEM;
2943 
2944 	/* Record the position of the start of the request so that
2945 	 * should we detect the updated seqno part-way through the
2946 	 * GPU processing the request, we never over-estimate the
2947 	 * position of the head.
2948 	 */
2949 	request_ring_position = intel_ring_get_tail(ring);
2950 
2951 	ret = ring->add_request(ring);
2952 	if (ret)
2953 		return ret;
2954 
2955 	request->seqno = intel_ring_get_seqno(ring);
2956 	request->ring = ring;
2957 	request->head = request_start;
2958 	request->tail = request_ring_position;
2959 
2960 	/* Whilst this request exists, batch_obj will be on the
2961 	 * active_list, and so will hold the active reference. Only when this
2962 	 * request is retired will the the batch_obj be moved onto the
2963 	 * inactive_list and lose its active reference. Hence we do not need
2964 	 * to explicitly hold another reference here.
2965 	 */
2966 	request->batch_obj = obj;
2967 
2968 	/* Hold a reference to the current context so that we can inspect
2969 	 * it later in case a hangcheck error event fires.
2970 	 */
2971 	request->ctx = ring->last_context;
2972 	if (request->ctx)
2973 		i915_gem_context_reference(request->ctx);
2974 
2975 	request->emitted_jiffies = jiffies;
2976 	list_add_tail(&request->list, &ring->request_list);
2977 	request->file_priv = NULL;
2978 
2979 	if (file) {
2980 		struct drm_i915_file_private *file_priv = file->driver_priv;
2981 
2982 		spin_lock(&file_priv->mm.lock);
2983 		request->file_priv = file_priv;
2984 		list_add_tail(&request->client_list,
2985 			      &file_priv->mm.request_list);
2986 		spin_unlock(&file_priv->mm.lock);
2987 	}
2988 
2989 	trace_i915_gem_request_add(ring, request->seqno);
2990 	ring->outstanding_lazy_seqno = 0;
2991 	ring->preallocated_lazy_request = NULL;
2992 
2993 	if (!dev_priv->ums.mm_suspended) {
2994 		i915_queue_hangcheck(ring->dev);
2995 
2996 		cancel_delayed_work_sync(&dev_priv->mm.idle_work);
2997 		queue_delayed_work(dev_priv->wq,
2998 				   &dev_priv->mm.retire_work,
2999 				   round_jiffies_up_relative(HZ));
3000 		intel_mark_busy(dev_priv->dev);
3001 	}
3002 
3003 	if (out_seqno)
3004 		*out_seqno = request->seqno;
3005 	return 0;
3006 }
3007 
3008 static inline void
i915_gem_request_remove_from_client(struct drm_i915_gem_request * request)3009 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
3010 {
3011 	struct drm_i915_file_private *file_priv = request->file_priv;
3012 
3013 	if (!file_priv)
3014 		return;
3015 
3016 	spin_lock(&file_priv->mm.lock);
3017 	list_del(&request->client_list);
3018 	request->file_priv = NULL;
3019 	spin_unlock(&file_priv->mm.lock);
3020 }
3021 
i915_context_is_banned(struct drm_i915_private * dev_priv,const struct i915_hw_context * ctx)3022 static bool i915_context_is_banned(struct drm_i915_private *dev_priv,
3023 				   const struct i915_hw_context *ctx)
3024 {
3025 	unsigned long elapsed;
3026 
3027 	elapsed = get_seconds() - ctx->hang_stats.guilty_ts;
3028 
3029 	if (ctx->hang_stats.banned)
3030 		return true;
3031 
3032 	if (elapsed <= DRM_I915_CTX_BAN_PERIOD) {
3033 		if (!i915_gem_context_is_default(ctx)) {
3034 			DRM_DEBUG("context hanging too fast, banning!\n");
3035 			return true;
3036 		} else if (dev_priv->gpu_error.stop_rings == 0) {
3037 			DRM_ERROR("gpu hanging too fast, banning!\n");
3038 			return true;
3039 		}
3040 	}
3041 
3042 	return false;
3043 }
3044 
i915_set_reset_status(struct drm_i915_private * dev_priv,struct i915_hw_context * ctx,const bool guilty)3045 static void i915_set_reset_status(struct drm_i915_private *dev_priv,
3046 				  struct i915_hw_context *ctx,
3047 				  const bool guilty)
3048 {
3049 	struct i915_ctx_hang_stats *hs;
3050 
3051 	if (WARN_ON(!ctx))
3052 		return;
3053 
3054 	hs = &ctx->hang_stats;
3055 
3056 	if (guilty) {
3057 		hs->banned = i915_context_is_banned(dev_priv, ctx);
3058 		hs->batch_active++;
3059 		hs->guilty_ts = get_seconds();
3060 	} else {
3061 		hs->batch_pending++;
3062 	}
3063 }
3064 
i915_gem_free_request(struct drm_i915_gem_request * request)3065 static void i915_gem_free_request(struct drm_i915_gem_request *request)
3066 {
3067 	list_del(&request->list);
3068 	i915_gem_request_remove_from_client(request);
3069 
3070 	if (request->ctx)
3071 		i915_gem_context_unreference(request->ctx);
3072 
3073 	kfree(request);
3074 }
3075 
3076 struct drm_i915_gem_request *
i915_gem_find_active_request(struct intel_ring_buffer * ring)3077 i915_gem_find_active_request(struct intel_ring_buffer *ring)
3078 {
3079 	struct drm_i915_gem_request *request;
3080 	u32 completed_seqno;
3081 
3082 	completed_seqno = ring->get_seqno(ring, false);
3083 
3084 	list_for_each_entry(request, &ring->request_list, list) {
3085 		if (i915_seqno_passed(completed_seqno, request->seqno))
3086 			continue;
3087 
3088 		return request;
3089 	}
3090 
3091 	return NULL;
3092 }
3093 
i915_gem_reset_ring_status(struct drm_i915_private * dev_priv,struct intel_ring_buffer * ring)3094 static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv,
3095 				       struct intel_ring_buffer *ring)
3096 {
3097 	struct drm_i915_gem_request *request;
3098 	bool ring_hung;
3099 
3100 	request = i915_gem_find_active_request(ring);
3101 
3102 	if (request == NULL)
3103 		return;
3104 
3105 	ring_hung = ring->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
3106 
3107 	i915_set_reset_status(dev_priv, request->ctx, ring_hung);
3108 
3109 	list_for_each_entry_continue(request, &ring->request_list, list)
3110 		i915_set_reset_status(dev_priv, request->ctx, false);
3111 }
3112 
i915_gem_reset_ring_cleanup(struct drm_i915_private * dev_priv,struct intel_ring_buffer * ring)3113 static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
3114 					struct intel_ring_buffer *ring)
3115 {
3116 	while (!list_empty(&ring->active_list)) {
3117 		struct drm_i915_gem_object *obj;
3118 
3119 		obj = list_first_entry(&ring->active_list,
3120 				       struct drm_i915_gem_object,
3121 				       ring_list);
3122 
3123 		i915_gem_object_move_to_inactive(obj);
3124 	}
3125 
3126 	/*
3127 	 * We must free the requests after all the corresponding objects have
3128 	 * been moved off active lists. Which is the same order as the normal
3129 	 * retire_requests function does. This is important if object hold
3130 	 * implicit references on things like e.g. ppgtt address spaces through
3131 	 * the request.
3132 	 */
3133 	while (!list_empty(&ring->request_list)) {
3134 		struct drm_i915_gem_request *request;
3135 
3136 		request = list_first_entry(&ring->request_list,
3137 					   struct drm_i915_gem_request,
3138 					   list);
3139 
3140 		i915_gem_free_request(request);
3141 	}
3142 }
3143 
i915_gem_restore_fences(struct drm_device * dev)3144 void i915_gem_restore_fences(struct drm_device *dev)
3145 {
3146 	struct drm_i915_private *dev_priv = dev->dev_private;
3147 	int i;
3148 
3149 	for (i = 0; i < dev_priv->num_fence_regs; i++) {
3150 		struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
3151 
3152 		/*
3153 		 * Commit delayed tiling changes if we have an object still
3154 		 * attached to the fence, otherwise just clear the fence.
3155 		 */
3156 		if (reg->obj) {
3157 			i915_gem_object_update_fence(reg->obj, reg,
3158 						     reg->obj->tiling_mode);
3159 		} else {
3160 			i915_gem_write_fence(dev, i, NULL);
3161 		}
3162 	}
3163 }
3164 
i915_gem_reset(struct drm_device * dev)3165 void i915_gem_reset(struct drm_device *dev)
3166 {
3167 	struct drm_i915_private *dev_priv = dev->dev_private;
3168 	struct intel_ring_buffer *ring;
3169 	int i;
3170 
3171 	/*
3172 	 * Before we free the objects from the requests, we need to inspect
3173 	 * them for finding the guilty party. As the requests only borrow
3174 	 * their reference to the objects, the inspection must be done first.
3175 	 */
3176 	for_each_ring(ring, dev_priv, i)
3177 		i915_gem_reset_ring_status(dev_priv, ring);
3178 
3179 	for_each_ring(ring, dev_priv, i)
3180 		i915_gem_reset_ring_cleanup(dev_priv, ring);
3181 
3182 	i915_gem_cleanup_ringbuffer(dev);
3183 
3184 	i915_gem_context_reset(dev);
3185 
3186 	i915_gem_restore_fences(dev);
3187 }
3188 
3189 /**
3190  * This function clears the request list as sequence numbers are passed.
3191  */
3192 static void
i915_gem_retire_requests_ring(struct intel_ring_buffer * ring)3193 i915_gem_retire_requests_ring(struct intel_ring_buffer *ring)
3194 {
3195 	uint32_t seqno;
3196 
3197 	if (list_empty(&ring->request_list))
3198 		return;
3199 
3200 	WARN_ON(i915_verify_lists(ring->dev));
3201 
3202 	seqno = ring->get_seqno(ring, true);
3203 
3204 	/* Move any buffers on the active list that are no longer referenced
3205 	 * by the ringbuffer to the flushing/inactive lists as appropriate,
3206 	 * before we free the context associated with the requests.
3207 	 */
3208 	while (!list_empty(&ring->active_list)) {
3209 		struct drm_i915_gem_object *obj;
3210 
3211 		obj = list_first_entry(&ring->active_list,
3212 				      struct drm_i915_gem_object,
3213 				      ring_list);
3214 
3215 		if (!i915_seqno_passed(seqno, obj->last_read_seqno))
3216 			break;
3217 
3218 		i915_gem_object_move_to_inactive(obj);
3219 	}
3220 
3221 
3222 	while (!list_empty(&ring->request_list)) {
3223 		struct drm_i915_gem_request *request;
3224 
3225 		request = list_first_entry(&ring->request_list,
3226 					   struct drm_i915_gem_request,
3227 					   list);
3228 
3229 		if (!i915_seqno_passed(seqno, request->seqno))
3230 			break;
3231 
3232 		trace_i915_gem_request_retire(ring, request->seqno);
3233 		/* We know the GPU must have read the request to have
3234 		 * sent us the seqno + interrupt, so use the position
3235 		 * of tail of the request to update the last known position
3236 		 * of the GPU head.
3237 		 */
3238 		ring->last_retired_head = request->tail;
3239 
3240 		i915_gem_free_request(request);
3241 	}
3242 
3243 	if (unlikely(ring->trace_irq_seqno &&
3244 		     i915_seqno_passed(seqno, ring->trace_irq_seqno))) {
3245 		ring->irq_put(ring);
3246 		ring->trace_irq_seqno = 0;
3247 	}
3248 
3249 	WARN_ON(i915_verify_lists(ring->dev));
3250 }
3251 
3252 bool
i915_gem_retire_requests(struct drm_device * dev)3253 i915_gem_retire_requests(struct drm_device *dev)
3254 {
3255 	struct drm_i915_private *dev_priv = dev->dev_private;
3256 	struct intel_ring_buffer *ring;
3257 	bool idle = true;
3258 	int i;
3259 
3260 	for_each_ring(ring, dev_priv, i) {
3261 		i915_gem_retire_requests_ring(ring);
3262 		idle &= list_empty(&ring->request_list);
3263 	}
3264 
3265 	if (idle)
3266 		mod_delayed_work(dev_priv->wq,
3267 				   &dev_priv->mm.idle_work,
3268 				   msecs_to_jiffies(100));
3269 
3270 	return idle;
3271 }
3272 
3273 static void
i915_gem_retire_work_handler(struct work_struct * work)3274 i915_gem_retire_work_handler(struct work_struct *work)
3275 {
3276 	struct drm_i915_private *dev_priv =
3277 		container_of(work, typeof(*dev_priv), mm.retire_work.work);
3278 	struct drm_device *dev = dev_priv->dev;
3279 	bool idle;
3280 
3281 	/* Come back later if the device is busy... */
3282 	idle = false;
3283 	if (mutex_trylock(&dev->struct_mutex)) {
3284 		idle = i915_gem_retire_requests(dev);
3285 		mutex_unlock(&dev->struct_mutex);
3286 	}
3287 	if (!idle)
3288 		queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work,
3289 				   round_jiffies_up_relative(HZ));
3290 }
3291 
3292 static void
i915_gem_idle_work_handler(struct work_struct * work)3293 i915_gem_idle_work_handler(struct work_struct *work)
3294 {
3295 	struct drm_i915_private *dev_priv =
3296 		container_of(work, typeof(*dev_priv), mm.idle_work.work);
3297 
3298 	intel_mark_idle(dev_priv->dev);
3299 }
3300 
3301 /**
3302  * Ensures that an object will eventually get non-busy by flushing any required
3303  * write domains, emitting any outstanding lazy request and retiring and
3304  * completed requests.
3305  */
3306 static int
i915_gem_object_flush_active(struct drm_i915_gem_object * obj)3307 i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
3308 {
3309 	int ret;
3310 
3311 	if (obj->active) {
3312 		ret = i915_gem_check_olr(obj->ring, obj->last_read_seqno);
3313 		if (ret)
3314 			return ret;
3315 
3316 		i915_gem_retire_requests_ring(obj->ring);
3317 	}
3318 
3319 	return 0;
3320 }
3321 
3322 /**
3323  * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
3324  * @DRM_IOCTL_ARGS: standard ioctl arguments
3325  *
3326  * Returns 0 if successful, else an error is returned with the remaining time in
3327  * the timeout parameter.
3328  *  -ETIME: object is still busy after timeout
3329  *  -ERESTARTSYS: signal interrupted the wait
3330  *  -ENONENT: object doesn't exist
3331  * Also possible, but rare:
3332  *  -EAGAIN: GPU wedged
3333  *  -ENOMEM: damn
3334  *  -ENODEV: Internal IRQ fail
3335  *  -E?: The add request failed
3336  *
3337  * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
3338  * non-zero timeout parameter the wait ioctl will wait for the given number of
3339  * nanoseconds on an object becoming unbusy. Since the wait itself does so
3340  * without holding struct_mutex the object may become re-busied before this
3341  * function completes. A similar but shorter * race condition exists in the busy
3342  * ioctl
3343  */
3344 int
i915_gem_wait_ioctl(struct drm_device * dev,void * data,struct drm_file * file)3345 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
3346 {
3347 	struct drm_i915_private *dev_priv = dev->dev_private;
3348 	struct drm_i915_gem_wait *args = data;
3349 	struct drm_gem_object *gobj;
3350 	struct drm_i915_gem_object *obj;
3351 	struct intel_ring_buffer *ring = NULL;
3352 	struct timespec timeout_stack, *timeout = NULL;
3353 	unsigned reset_counter;
3354 	u32 seqno = 0;
3355 	int ret = 0;
3356 
3357 	if (args->timeout_ns >= 0) {
3358 		timeout_stack = ns_to_timespec(args->timeout_ns);
3359 		timeout = &timeout_stack;
3360 	}
3361 
3362 	ret = i915_mutex_lock_interruptible(dev);
3363 	if (ret)
3364 		return ret;
3365 
3366 	gobj = drm_gem_object_lookup(dev, file, args->bo_handle);
3367 	if (gobj == NULL) {
3368 		mutex_unlock(&dev->struct_mutex);
3369 		return -ENOENT;
3370 	}
3371 	obj = to_intel_bo(gobj);
3372 
3373 	/* Need to make sure the object gets inactive eventually. */
3374 	ret = i915_gem_object_flush_active(obj);
3375 	if (ret)
3376 		goto out;
3377 
3378 	if (obj->active) {
3379 		seqno = obj->last_read_seqno;
3380 		ring = obj->ring;
3381 	}
3382 
3383 	if (seqno == 0)
3384 		 goto out;
3385 
3386 	/* Do this after OLR check to make sure we make forward progress polling
3387 	 * on this IOCTL with a 0 timeout (like busy ioctl)
3388 	 */
3389 	if (!args->timeout_ns) {
3390 		ret = -ETIME;
3391 		goto out;
3392 	}
3393 
3394 	drm_gem_object_unreference(&obj->base);
3395 	reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
3396 	mutex_unlock(&dev->struct_mutex);
3397 
3398 	ret = __wait_seqno(ring, seqno, reset_counter, true, timeout, file->driver_priv);
3399 	if (timeout)
3400 		args->timeout_ns = timespec_to_ns(timeout);
3401 	return ret;
3402 
3403 out:
3404 	drm_gem_object_unreference(&obj->base);
3405 	mutex_unlock(&dev->struct_mutex);
3406 	return ret;
3407 }
3408 
3409 /**
3410  * i915_gem_object_sync - sync an object to a ring.
3411  *
3412  * @obj: object which may be in use on another ring.
3413  * @to: ring we wish to use the object on. May be NULL.
3414  *
3415  * This code is meant to abstract object synchronization with the GPU.
3416  * Calling with NULL implies synchronizing the object with the CPU
3417  * rather than a particular GPU ring.
3418  *
3419  * Returns 0 if successful, else propagates up the lower layer error.
3420  */
3421 int
i915_gem_object_sync(struct drm_i915_gem_object * obj,struct intel_ring_buffer * to)3422 i915_gem_object_sync(struct drm_i915_gem_object *obj,
3423 		     struct intel_ring_buffer *to)
3424 {
3425 	struct intel_ring_buffer *from = obj->ring;
3426 	u32 seqno;
3427 	int ret, idx;
3428 
3429 	if (from == NULL || to == from)
3430 		return 0;
3431 
3432 	if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev))
3433 		return i915_gem_object_wait_rendering(obj, false);
3434 
3435 	idx = intel_ring_sync_index(from, to);
3436 
3437 	seqno = obj->last_read_seqno;
3438 	if (seqno <= from->sync_seqno[idx])
3439 		return 0;
3440 
3441 	ret = i915_gem_check_olr(obj->ring, seqno);
3442 	if (ret)
3443 		return ret;
3444 
3445 	trace_i915_gem_ring_sync_to(from, to, seqno);
3446 	ret = to->sync_to(to, from, seqno);
3447 	if (!ret)
3448 		/* We use last_read_seqno because sync_to()
3449 		 * might have just caused seqno wrap under
3450 		 * the radar.
3451 		 */
3452 		from->sync_seqno[idx] = obj->last_read_seqno;
3453 
3454 	return ret;
3455 }
3456 
i915_gem_object_finish_gtt(struct drm_i915_gem_object * obj)3457 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
3458 {
3459 	u32 old_write_domain, old_read_domains;
3460 
3461 	/* Force a pagefault for domain tracking on next user access */
3462 	i915_gem_release_mmap(obj);
3463 
3464 	if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
3465 		return;
3466 
3467 	/* Wait for any direct GTT access to complete */
3468 	mb();
3469 
3470 	old_read_domains = obj->base.read_domains;
3471 	old_write_domain = obj->base.write_domain;
3472 
3473 	obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT;
3474 	obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
3475 
3476 	trace_i915_gem_object_change_domain(obj,
3477 					    old_read_domains,
3478 					    old_write_domain);
3479 }
3480 
i915_vma_unbind(struct i915_vma * vma)3481 int i915_vma_unbind(struct i915_vma *vma)
3482 {
3483 	struct drm_i915_gem_object *obj = vma->obj;
3484 	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
3485 	int ret;
3486 
3487 	if (list_empty(&vma->vma_link))
3488 		return 0;
3489 
3490 	if (!drm_mm_node_allocated(&vma->node)) {
3491 		i915_gem_vma_destroy(vma);
3492 		return 0;
3493 	}
3494 
3495 	if (vma->pin_count)
3496 		return -EBUSY;
3497 
3498 	BUG_ON(obj->pages == NULL);
3499 
3500 	ret = i915_gem_object_finish_gpu(obj);
3501 	if (ret)
3502 		return ret;
3503 	/* Continue on if we fail due to EIO, the GPU is hung so we
3504 	 * should be safe and we need to cleanup or else we might
3505 	 * cause memory corruption through use-after-free.
3506 	 */
3507 
3508 	i915_gem_object_finish_gtt(obj);
3509 
3510 	/* release the fence reg _after_ flushing */
3511 	ret = i915_gem_object_put_fence(obj);
3512 	if (ret)
3513 		return ret;
3514 
3515 	trace_i915_vma_unbind(vma);
3516 
3517 	vma->unbind_vma(vma);
3518 
3519 	i915_gem_gtt_finish_object(obj);
3520 
3521 	list_del_init(&vma->mm_list);
3522 	/* Avoid an unnecessary call to unbind on rebind. */
3523 	if (i915_is_ggtt(vma->vm))
3524 		obj->map_and_fenceable = true;
3525 
3526 	drm_mm_remove_node(&vma->node);
3527 	i915_gem_vma_destroy(vma);
3528 
3529 	/* Since the unbound list is global, only move to that list if
3530 	 * no more VMAs exist. */
3531 	if (list_empty(&obj->vma_list))
3532 		list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list);
3533 
3534 	/* And finally now the object is completely decoupled from this vma,
3535 	 * we can drop its hold on the backing storage and allow it to be
3536 	 * reaped by the shrinker.
3537 	 */
3538 	i915_gem_object_unpin_pages(obj);
3539 
3540 	return 0;
3541 }
3542 
i915_gpu_idle(struct drm_device * dev)3543 int i915_gpu_idle(struct drm_device *dev)
3544 {
3545 	struct drm_i915_private *dev_priv = dev->dev_private;
3546 	struct intel_ring_buffer *ring;
3547 	int ret, i;
3548 
3549 	/* Flush everything onto the inactive list. */
3550 	for_each_ring(ring, dev_priv, i) {
3551 		ret = i915_switch_context(ring, ring->default_context);
3552 		if (ret)
3553 			return ret;
3554 
3555 		ret = intel_ring_idle(ring);
3556 		if (ret)
3557 			return ret;
3558 	}
3559 
3560 	return 0;
3561 }
3562 
i965_write_fence_reg(struct drm_device * dev,int reg,struct drm_i915_gem_object * obj)3563 static void i965_write_fence_reg(struct drm_device *dev, int reg,
3564 				 struct drm_i915_gem_object *obj)
3565 {
3566 	struct drm_i915_private *dev_priv = dev->dev_private;
3567 	int fence_reg;
3568 	int fence_pitch_shift;
3569 
3570 	if (INTEL_INFO(dev)->gen >= 6) {
3571 		fence_reg = FENCE_REG_SANDYBRIDGE_0;
3572 		fence_pitch_shift = SANDYBRIDGE_FENCE_PITCH_SHIFT;
3573 	} else {
3574 		fence_reg = FENCE_REG_965_0;
3575 		fence_pitch_shift = I965_FENCE_PITCH_SHIFT;
3576 	}
3577 
3578 	fence_reg += reg * 8;
3579 
3580 	/* To w/a incoherency with non-atomic 64-bit register updates,
3581 	 * we split the 64-bit update into two 32-bit writes. In order
3582 	 * for a partial fence not to be evaluated between writes, we
3583 	 * precede the update with write to turn off the fence register,
3584 	 * and only enable the fence as the last step.
3585 	 *
3586 	 * For extra levels of paranoia, we make sure each step lands
3587 	 * before applying the next step.
3588 	 */
3589 	I915_WRITE(fence_reg, 0);
3590 	POSTING_READ(fence_reg);
3591 
3592 	if (obj) {
3593 		u32 size = i915_gem_obj_ggtt_size(obj);
3594 		uint64_t val;
3595 
3596 		val = (uint64_t)((i915_gem_obj_ggtt_offset(obj) + size - 4096) &
3597 				 0xfffff000) << 32;
3598 		val |= i915_gem_obj_ggtt_offset(obj) & 0xfffff000;
3599 		val |= (uint64_t)((obj->stride / 128) - 1) << fence_pitch_shift;
3600 		if (obj->tiling_mode == I915_TILING_Y)
3601 			val |= 1 << I965_FENCE_TILING_Y_SHIFT;
3602 		val |= I965_FENCE_REG_VALID;
3603 
3604 		I915_WRITE(fence_reg + 4, val >> 32);
3605 		POSTING_READ(fence_reg + 4);
3606 
3607 		I915_WRITE(fence_reg + 0, val);
3608 		POSTING_READ(fence_reg);
3609 	} else {
3610 		I915_WRITE(fence_reg + 4, 0);
3611 		POSTING_READ(fence_reg + 4);
3612 	}
3613 }
3614 
i915_write_fence_reg(struct drm_device * dev,int reg,struct drm_i915_gem_object * obj)3615 static void i915_write_fence_reg(struct drm_device *dev, int reg,
3616 				 struct drm_i915_gem_object *obj)
3617 {
3618 	struct drm_i915_private *dev_priv = dev->dev_private;
3619 	u32 val;
3620 
3621 	if (obj) {
3622 		u32 size = i915_gem_obj_ggtt_size(obj);
3623 		int pitch_val;
3624 		int tile_width;
3625 
3626 		WARN((i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK) ||
3627 		     (size & -size) != size ||
3628 		     (i915_gem_obj_ggtt_offset(obj) & (size - 1)),
3629 		     "object 0x%08lx [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
3630 		     i915_gem_obj_ggtt_offset(obj), obj->map_and_fenceable, size);
3631 
3632 		if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
3633 			tile_width = 128;
3634 		else
3635 			tile_width = 512;
3636 
3637 		/* Note: pitch better be a power of two tile widths */
3638 		pitch_val = obj->stride / tile_width;
3639 		pitch_val = ffs(pitch_val) - 1;
3640 
3641 		val = i915_gem_obj_ggtt_offset(obj);
3642 		if (obj->tiling_mode == I915_TILING_Y)
3643 			val |= 1 << I830_FENCE_TILING_Y_SHIFT;
3644 		val |= I915_FENCE_SIZE_BITS(size);
3645 		val |= pitch_val << I830_FENCE_PITCH_SHIFT;
3646 		val |= I830_FENCE_REG_VALID;
3647 	} else
3648 		val = 0;
3649 
3650 	if (reg < 8)
3651 		reg = FENCE_REG_830_0 + reg * 4;
3652 	else
3653 		reg = FENCE_REG_945_8 + (reg - 8) * 4;
3654 
3655 	I915_WRITE(reg, val);
3656 	POSTING_READ(reg);
3657 }
3658 
i830_write_fence_reg(struct drm_device * dev,int reg,struct drm_i915_gem_object * obj)3659 static void i830_write_fence_reg(struct drm_device *dev, int reg,
3660 				struct drm_i915_gem_object *obj)
3661 {
3662 	struct drm_i915_private *dev_priv = dev->dev_private;
3663 	uint32_t val;
3664 
3665 	if (obj) {
3666 		u32 size = i915_gem_obj_ggtt_size(obj);
3667 		uint32_t pitch_val;
3668 
3669 		WARN((i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK) ||
3670 		     (size & -size) != size ||
3671 		     (i915_gem_obj_ggtt_offset(obj) & (size - 1)),
3672 		     "object 0x%08lx not 512K or pot-size 0x%08x aligned\n",
3673 		     i915_gem_obj_ggtt_offset(obj), size);
3674 
3675 		pitch_val = obj->stride / 128;
3676 		pitch_val = ffs(pitch_val) - 1;
3677 
3678 		val = i915_gem_obj_ggtt_offset(obj);
3679 		if (obj->tiling_mode == I915_TILING_Y)
3680 			val |= 1 << I830_FENCE_TILING_Y_SHIFT;
3681 		val |= I830_FENCE_SIZE_BITS(size);
3682 		val |= pitch_val << I830_FENCE_PITCH_SHIFT;
3683 		val |= I830_FENCE_REG_VALID;
3684 	} else
3685 		val = 0;
3686 
3687 	I915_WRITE(FENCE_REG_830_0 + reg * 4, val);
3688 	POSTING_READ(FENCE_REG_830_0 + reg * 4);
3689 }
3690 
i915_gem_object_needs_mb(struct drm_i915_gem_object * obj)3691 inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj)
3692 {
3693 	return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT;
3694 }
3695 
i915_gem_write_fence(struct drm_device * dev,int reg,struct drm_i915_gem_object * obj)3696 static void i915_gem_write_fence(struct drm_device *dev, int reg,
3697 				 struct drm_i915_gem_object *obj)
3698 {
3699 	struct drm_i915_private *dev_priv = dev->dev_private;
3700 
3701 	/* Ensure that all CPU reads are completed before installing a fence
3702 	 * and all writes before removing the fence.
3703 	 */
3704 	if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj))
3705 		mb();
3706 
3707 	WARN(obj && (!obj->stride || !obj->tiling_mode),
3708 	     "bogus fence setup with stride: 0x%x, tiling mode: %i\n",
3709 	     obj->stride, obj->tiling_mode);
3710 
3711 	switch (INTEL_INFO(dev)->gen) {
3712 	case 8:
3713 	case 7:
3714 	case 6:
3715 	case 5:
3716 	case 4: i965_write_fence_reg(dev, reg, obj); break;
3717 	case 3: i915_write_fence_reg(dev, reg, obj); break;
3718 	case 2: i830_write_fence_reg(dev, reg, obj); break;
3719 	default: BUG();
3720 	}
3721 
3722 	/* And similarly be paranoid that no direct access to this region
3723 	 * is reordered to before the fence is installed.
3724 	 */
3725 	if (i915_gem_object_needs_mb(obj))
3726 		mb();
3727 }
3728 
fence_number(struct drm_i915_private * dev_priv,struct drm_i915_fence_reg * fence)3729 static inline int fence_number(struct drm_i915_private *dev_priv,
3730 			       struct drm_i915_fence_reg *fence)
3731 {
3732 	return fence - dev_priv->fence_regs;
3733 }
3734 
i915_gem_object_update_fence(struct drm_i915_gem_object * obj,struct drm_i915_fence_reg * fence,bool enable)3735 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
3736 					 struct drm_i915_fence_reg *fence,
3737 					 bool enable)
3738 {
3739 	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
3740 	int reg = fence_number(dev_priv, fence);
3741 
3742 	i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL);
3743 
3744 	if (enable) {
3745 		obj->fence_reg = reg;
3746 		fence->obj = obj;
3747 		list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list);
3748 	} else {
3749 		obj->fence_reg = I915_FENCE_REG_NONE;
3750 		fence->obj = NULL;
3751 		list_del_init(&fence->lru_list);
3752 	}
3753 	obj->fence_dirty = false;
3754 }
3755 
3756 static int
i915_gem_object_wait_fence(struct drm_i915_gem_object * obj)3757 i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
3758 {
3759 	if (obj->last_fenced_seqno) {
3760 		int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno);
3761 		if (ret)
3762 			return ret;
3763 
3764 		obj->last_fenced_seqno = 0;
3765 	}
3766 
3767 	obj->fenced_gpu_access = false;
3768 	return 0;
3769 }
3770 
3771 int
i915_gem_object_put_fence(struct drm_i915_gem_object * obj)3772 i915_gem_object_put_fence(struct drm_i915_gem_object *obj)
3773 {
3774 	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
3775 	struct drm_i915_fence_reg *fence;
3776 	int ret;
3777 
3778 	ret = i915_gem_object_wait_fence(obj);
3779 	if (ret)
3780 		return ret;
3781 
3782 	if (obj->fence_reg == I915_FENCE_REG_NONE)
3783 		return 0;
3784 
3785 	fence = &dev_priv->fence_regs[obj->fence_reg];
3786 
3787 	i915_gem_object_fence_lost(obj);
3788 	i915_gem_object_update_fence(obj, fence, false);
3789 
3790 	return 0;
3791 }
3792 
3793 static struct drm_i915_fence_reg *
i915_find_fence_reg(struct drm_device * dev)3794 i915_find_fence_reg(struct drm_device *dev)
3795 {
3796 	struct drm_i915_private *dev_priv = dev->dev_private;
3797 	struct drm_i915_fence_reg *reg, *avail;
3798 	int i;
3799 
3800 	/* First try to find a free reg */
3801 	avail = NULL;
3802 	for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
3803 		reg = &dev_priv->fence_regs[i];
3804 		if (!reg->obj)
3805 			return reg;
3806 
3807 		if (!reg->pin_count)
3808 			avail = reg;
3809 	}
3810 
3811 	if (avail == NULL)
3812 		goto deadlock;
3813 
3814 	/* None available, try to steal one or wait for a user to finish */
3815 	list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) {
3816 		if (reg->pin_count)
3817 			continue;
3818 
3819 		return reg;
3820 	}
3821 
3822 deadlock:
3823 	/* Wait for completion of pending flips which consume fences */
3824 	if (intel_has_pending_fb_unpin(dev))
3825 		return ERR_PTR(-EAGAIN);
3826 
3827 	return ERR_PTR(-EDEADLK);
3828 }
3829 
3830 /**
3831  * i915_gem_object_get_fence - set up fencing for an object
3832  * @obj: object to map through a fence reg
3833  *
3834  * When mapping objects through the GTT, userspace wants to be able to write
3835  * to them without having to worry about swizzling if the object is tiled.
3836  * This function walks the fence regs looking for a free one for @obj,
3837  * stealing one if it can't find any.
3838  *
3839  * It then sets up the reg based on the object's properties: address, pitch
3840  * and tiling format.
3841  *
3842  * For an untiled surface, this removes any existing fence.
3843  */
3844 int
i915_gem_object_get_fence(struct drm_i915_gem_object * obj)3845 i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
3846 {
3847 	struct drm_device *dev = obj->base.dev;
3848 	struct drm_i915_private *dev_priv = dev->dev_private;
3849 	bool enable = obj->tiling_mode != I915_TILING_NONE;
3850 	struct drm_i915_fence_reg *reg;
3851 	int ret;
3852 
3853 	/* Have we updated the tiling parameters upon the object and so
3854 	 * will need to serialise the write to the associated fence register?
3855 	 */
3856 	if (obj->fence_dirty) {
3857 		ret = i915_gem_object_wait_fence(obj);
3858 		if (ret)
3859 			return ret;
3860 	}
3861 
3862 	/* Just update our place in the LRU if our fence is getting reused. */
3863 	if (obj->fence_reg != I915_FENCE_REG_NONE) {
3864 		reg = &dev_priv->fence_regs[obj->fence_reg];
3865 		if (!obj->fence_dirty) {
3866 			list_move_tail(&reg->lru_list,
3867 				       &dev_priv->mm.fence_list);
3868 			return 0;
3869 		}
3870 	} else if (enable) {
3871 		reg = i915_find_fence_reg(dev);
3872 		if (IS_ERR(reg))
3873 			return PTR_ERR(reg);
3874 
3875 		if (reg->obj) {
3876 			struct drm_i915_gem_object *old = reg->obj;
3877 
3878 			ret = i915_gem_object_wait_fence(old);
3879 			if (ret)
3880 				return ret;
3881 
3882 			i915_gem_object_fence_lost(old);
3883 		}
3884 	} else
3885 		return 0;
3886 
3887 	i915_gem_object_update_fence(obj, reg, enable);
3888 
3889 	return 0;
3890 }
3891 
i915_gem_valid_gtt_space(struct drm_device * dev,struct drm_mm_node * gtt_space,unsigned long cache_level)3892 static bool i915_gem_valid_gtt_space(struct drm_device *dev,
3893 				     struct drm_mm_node *gtt_space,
3894 				     unsigned long cache_level)
3895 {
3896 	struct drm_mm_node *other;
3897 
3898 	/* On non-LLC machines we have to be careful when putting differing
3899 	 * types of snoopable memory together to avoid the prefetcher
3900 	 * crossing memory domains and dying.
3901 	 */
3902 	if (HAS_LLC(dev))
3903 		return true;
3904 
3905 	if (!drm_mm_node_allocated(gtt_space))
3906 		return true;
3907 
3908 	if (list_empty(&gtt_space->node_list))
3909 		return true;
3910 
3911 	other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list);
3912 	if (other->allocated && !other->hole_follows && other->color != cache_level)
3913 		return false;
3914 
3915 	other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list);
3916 	if (other->allocated && !gtt_space->hole_follows && other->color != cache_level)
3917 		return false;
3918 
3919 	return true;
3920 }
3921 
i915_gem_verify_gtt(struct drm_device * dev)3922 static void i915_gem_verify_gtt(struct drm_device *dev)
3923 {
3924 #if WATCH_GTT
3925 	struct drm_i915_private *dev_priv = dev->dev_private;
3926 	struct drm_i915_gem_object *obj;
3927 	int err = 0;
3928 
3929 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
3930 		if (obj->gtt_space == NULL) {
3931 			printk(KERN_ERR "object found on GTT list with no space reserved\n");
3932 			err++;
3933 			continue;
3934 		}
3935 
3936 		if (obj->cache_level != obj->gtt_space->color) {
3937 			printk(KERN_ERR "object reserved space [%08lx, %08lx] with wrong color, cache_level=%x, color=%lx\n",
3938 			       i915_gem_obj_ggtt_offset(obj),
3939 			       i915_gem_obj_ggtt_offset(obj) + i915_gem_obj_ggtt_size(obj),
3940 			       obj->cache_level,
3941 			       obj->gtt_space->color);
3942 			err++;
3943 			continue;
3944 		}
3945 
3946 		if (!i915_gem_valid_gtt_space(dev,
3947 					      obj->gtt_space,
3948 					      obj->cache_level)) {
3949 			printk(KERN_ERR "invalid GTT space found at [%08lx, %08lx] - color=%x\n",
3950 			       i915_gem_obj_ggtt_offset(obj),
3951 			       i915_gem_obj_ggtt_offset(obj) + i915_gem_obj_ggtt_size(obj),
3952 			       obj->cache_level);
3953 			err++;
3954 			continue;
3955 		}
3956 	}
3957 
3958 	WARN_ON(err);
3959 #endif
3960 }
3961 
3962 /**
3963  * Finds free space in the GTT aperture and binds the object there.
3964  */
3965 static struct i915_vma *
i915_gem_object_bind_to_vm(struct drm_i915_gem_object * obj,struct i915_address_space * vm,unsigned alignment,uint64_t flags)3966 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
3967 			   struct i915_address_space *vm,
3968 			   unsigned alignment,
3969 			   uint64_t flags)
3970 {
3971 	struct drm_device *dev = obj->base.dev;
3972 	struct drm_i915_private *dev_priv = dev->dev_private;
3973 	u32 size, fence_size, fence_alignment, unfenced_alignment;
3974 	unsigned long start =
3975 		flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
3976 	unsigned long end =
3977 		flags & PIN_MAPPABLE ? dev_priv->gtt.mappable_end : vm->total;
3978 	struct i915_vma *vma;
3979 	int ret;
3980 
3981 	fence_size = i915_gem_get_gtt_size(dev,
3982 					   obj->base.size,
3983 					   obj->tiling_mode);
3984 	fence_alignment = i915_gem_get_gtt_alignment(dev,
3985 						     obj->base.size,
3986 						     obj->tiling_mode, true);
3987 	unfenced_alignment =
3988 		i915_gem_get_gtt_alignment(dev,
3989 					   obj->base.size,
3990 					   obj->tiling_mode, false);
3991 
3992 	if (alignment == 0)
3993 		alignment = flags & PIN_MAPPABLE ? fence_alignment :
3994 						unfenced_alignment;
3995 	if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
3996 		DRM_DEBUG("Invalid object alignment requested %u\n", alignment);
3997 		return ERR_PTR(-EINVAL);
3998 	}
3999 
4000 	size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
4001 
4002 	/* If the object is bigger than the entire aperture, reject it early
4003 	 * before evicting everything in a vain attempt to find space.
4004 	 */
4005 	if (obj->base.size > end) {
4006 		DRM_DEBUG("Attempting to bind an object larger than the aperture: object=%zd > %s aperture=%lu\n",
4007 			  obj->base.size,
4008 			  flags & PIN_MAPPABLE ? "mappable" : "total",
4009 			  end);
4010 		return ERR_PTR(-E2BIG);
4011 	}
4012 
4013 	ret = i915_gem_object_get_pages(obj);
4014 	if (ret)
4015 		return ERR_PTR(ret);
4016 
4017 	i915_gem_object_pin_pages(obj);
4018 
4019 	vma = i915_gem_obj_lookup_or_create_vma(obj, vm);
4020 	if (IS_ERR(vma))
4021 		goto err_unpin;
4022 
4023 search_free:
4024 	ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
4025 						  size, alignment,
4026 						  obj->cache_level,
4027 						  start, end,
4028 						  DRM_MM_SEARCH_DEFAULT,
4029 						  DRM_MM_CREATE_DEFAULT);
4030 	if (ret) {
4031 		ret = i915_gem_evict_something(dev, vm, size, alignment,
4032 					       obj->cache_level,
4033 					       start, end,
4034 					       flags);
4035 		if (ret == 0)
4036 			goto search_free;
4037 
4038 		goto err_free_vma;
4039 	}
4040 	if (WARN_ON(!i915_gem_valid_gtt_space(dev, &vma->node,
4041 					      obj->cache_level))) {
4042 		ret = -EINVAL;
4043 		goto err_remove_node;
4044 	}
4045 
4046 	ret = i915_gem_gtt_prepare_object(obj);
4047 	if (ret)
4048 		goto err_remove_node;
4049 
4050 	list_move_tail(&obj->global_list, &dev_priv->mm.bound_list);
4051 	list_add_tail(&vma->mm_list, &vm->inactive_list);
4052 
4053 	if (i915_is_ggtt(vm)) {
4054 		bool mappable, fenceable;
4055 
4056 		fenceable = (vma->node.size == fence_size &&
4057 			     (vma->node.start & (fence_alignment - 1)) == 0);
4058 
4059 		mappable = (vma->node.start + obj->base.size <=
4060 			    dev_priv->gtt.mappable_end);
4061 
4062 		obj->map_and_fenceable = mappable && fenceable;
4063 	}
4064 
4065 	WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable);
4066 
4067 	trace_i915_vma_bind(vma, flags);
4068 	vma->bind_vma(vma, obj->cache_level,
4069 		      flags & (PIN_MAPPABLE | PIN_GLOBAL) ? GLOBAL_BIND : 0);
4070 
4071 	i915_gem_verify_gtt(dev);
4072 	return vma;
4073 
4074 err_remove_node:
4075 	drm_mm_remove_node(&vma->node);
4076 err_free_vma:
4077 	i915_gem_vma_destroy(vma);
4078 	vma = ERR_PTR(ret);
4079 err_unpin:
4080 	i915_gem_object_unpin_pages(obj);
4081 	return vma;
4082 }
4083 
4084 bool
i915_gem_clflush_object(struct drm_i915_gem_object * obj,bool force)4085 i915_gem_clflush_object(struct drm_i915_gem_object *obj,
4086 			bool force)
4087 {
4088 	/* If we don't have a page list set up, then we're not pinned
4089 	 * to GPU, and we can ignore the cache flush because it'll happen
4090 	 * again at bind time.
4091 	 */
4092 	if (obj->pages == NULL)
4093 		return false;
4094 
4095 	/*
4096 	 * Stolen memory is always coherent with the GPU as it is explicitly
4097 	 * marked as wc by the system, or the system is cache-coherent.
4098 	 */
4099 	if (obj->stolen)
4100 		return false;
4101 
4102 	/* If the GPU is snooping the contents of the CPU cache,
4103 	 * we do not need to manually clear the CPU cache lines.  However,
4104 	 * the caches are only snooped when the render cache is
4105 	 * flushed/invalidated.  As we always have to emit invalidations
4106 	 * and flushes when moving into and out of the RENDER domain, correct
4107 	 * snooping behaviour occurs naturally as the result of our domain
4108 	 * tracking.
4109 	 */
4110 	if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
4111 		return false;
4112 
4113 	trace_i915_gem_object_clflush(obj);
4114 #ifdef __NetBSD__
4115 	drm_clflush_pglist(&obj->igo_pageq);
4116 #else
4117 	drm_clflush_sg(obj->pages);
4118 #endif
4119 
4120 	return true;
4121 }
4122 
4123 /** Flushes the GTT write domain for the object if it's dirty. */
4124 static void
i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object * obj)4125 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
4126 {
4127 	uint32_t old_write_domain;
4128 
4129 	if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
4130 		return;
4131 
4132 	/* No actual flushing is required for the GTT write domain.  Writes
4133 	 * to it immediately go to main memory as far as we know, so there's
4134 	 * no chipset flush.  It also doesn't land in render cache.
4135 	 *
4136 	 * However, we do have to enforce the order so that all writes through
4137 	 * the GTT land before any writes to the device, such as updates to
4138 	 * the GATT itself.
4139 	 */
4140 	wmb();
4141 
4142 	old_write_domain = obj->base.write_domain;
4143 	obj->base.write_domain = 0;
4144 
4145 	trace_i915_gem_object_change_domain(obj,
4146 					    obj->base.read_domains,
4147 					    old_write_domain);
4148 }
4149 
4150 /** Flushes the CPU write domain for the object if it's dirty. */
4151 static void
i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object * obj,bool force)4152 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj,
4153 				       bool force)
4154 {
4155 	uint32_t old_write_domain;
4156 
4157 	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
4158 		return;
4159 
4160 	if (i915_gem_clflush_object(obj, force))
4161 		i915_gem_chipset_flush(obj->base.dev);
4162 
4163 	old_write_domain = obj->base.write_domain;
4164 	obj->base.write_domain = 0;
4165 
4166 	trace_i915_gem_object_change_domain(obj,
4167 					    obj->base.read_domains,
4168 					    old_write_domain);
4169 }
4170 
4171 /**
4172  * Moves a single object to the GTT read, and possibly write domain.
4173  *
4174  * This function returns when the move is complete, including waiting on
4175  * flushes to occur.
4176  */
4177 int
i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object * obj,bool write)4178 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
4179 {
4180 	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
4181 	uint32_t old_write_domain, old_read_domains;
4182 	int ret;
4183 
4184 	/* Not valid to be called on unbound objects. */
4185 	if (!i915_gem_obj_bound_any(obj))
4186 		return -EINVAL;
4187 
4188 	if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
4189 		return 0;
4190 
4191 	ret = i915_gem_object_wait_rendering(obj, !write);
4192 	if (ret)
4193 		return ret;
4194 
4195 	i915_gem_object_flush_cpu_write_domain(obj, false);
4196 
4197 	/* Serialise direct access to this object with the barriers for
4198 	 * coherent writes from the GPU, by effectively invalidating the
4199 	 * GTT domain upon first access.
4200 	 */
4201 	if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
4202 		mb();
4203 
4204 	old_write_domain = obj->base.write_domain;
4205 	old_read_domains = obj->base.read_domains;
4206 
4207 	/* It should now be out of any other write domains, and we can update
4208 	 * the domain values for our changes.
4209 	 */
4210 	BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
4211 	obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
4212 	if (write) {
4213 		obj->base.read_domains = I915_GEM_DOMAIN_GTT;
4214 		obj->base.write_domain = I915_GEM_DOMAIN_GTT;
4215 		obj->dirty = 1;
4216 	}
4217 
4218 	trace_i915_gem_object_change_domain(obj,
4219 					    old_read_domains,
4220 					    old_write_domain);
4221 
4222 	/* And bump the LRU for this access */
4223 	if (i915_gem_object_is_inactive(obj)) {
4224 		struct i915_vma *vma = i915_gem_obj_to_ggtt(obj);
4225 		if (vma)
4226 			list_move_tail(&vma->mm_list,
4227 				       &dev_priv->gtt.base.inactive_list);
4228 
4229 	}
4230 
4231 	return 0;
4232 }
4233 
i915_gem_object_set_cache_level(struct drm_i915_gem_object * obj,enum i915_cache_level cache_level)4234 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
4235 				    enum i915_cache_level cache_level)
4236 {
4237 	struct drm_device *dev = obj->base.dev;
4238 	struct i915_vma *vma, *next;
4239 	int ret;
4240 
4241 	if (obj->cache_level == cache_level)
4242 		return 0;
4243 
4244 	if (i915_gem_obj_is_pinned(obj)) {
4245 		DRM_DEBUG("can not change the cache level of pinned objects\n");
4246 		return -EBUSY;
4247 	}
4248 
4249 	list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
4250 		if (!i915_gem_valid_gtt_space(dev, &vma->node, cache_level)) {
4251 			ret = i915_vma_unbind(vma);
4252 			if (ret)
4253 				return ret;
4254 		}
4255 	}
4256 
4257 	if (i915_gem_obj_bound_any(obj)) {
4258 		ret = i915_gem_object_finish_gpu(obj);
4259 		if (ret)
4260 			return ret;
4261 
4262 		i915_gem_object_finish_gtt(obj);
4263 
4264 		/* Before SandyBridge, you could not use tiling or fence
4265 		 * registers with snooped memory, so relinquish any fences
4266 		 * currently pointing to our region in the aperture.
4267 		 */
4268 		if (INTEL_INFO(dev)->gen < 6) {
4269 			ret = i915_gem_object_put_fence(obj);
4270 			if (ret)
4271 				return ret;
4272 		}
4273 
4274 		list_for_each_entry(vma, &obj->vma_list, vma_link)
4275 			if (drm_mm_node_allocated(&vma->node))
4276 				vma->bind_vma(vma, cache_level,
4277 					      obj->has_global_gtt_mapping ? GLOBAL_BIND : 0);
4278 	}
4279 
4280 	list_for_each_entry(vma, &obj->vma_list, vma_link)
4281 		vma->node.color = cache_level;
4282 	obj->cache_level = cache_level;
4283 
4284 	if (cpu_write_needs_clflush(obj)) {
4285 		u32 old_read_domains, old_write_domain;
4286 
4287 		/* If we're coming from LLC cached, then we haven't
4288 		 * actually been tracking whether the data is in the
4289 		 * CPU cache or not, since we only allow one bit set
4290 		 * in obj->write_domain and have been skipping the clflushes.
4291 		 * Just set it to the CPU cache for now.
4292 		 */
4293 		WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
4294 
4295 		old_read_domains = obj->base.read_domains;
4296 		old_write_domain = obj->base.write_domain;
4297 
4298 		obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4299 		obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4300 
4301 		trace_i915_gem_object_change_domain(obj,
4302 						    old_read_domains,
4303 						    old_write_domain);
4304 	}
4305 
4306 	i915_gem_verify_gtt(dev);
4307 	return 0;
4308 }
4309 
i915_gem_get_caching_ioctl(struct drm_device * dev,void * data,struct drm_file * file)4310 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
4311 			       struct drm_file *file)
4312 {
4313 	struct drm_i915_gem_caching *args = data;
4314 	struct drm_gem_object *gobj;
4315 	struct drm_i915_gem_object *obj;
4316 	int ret;
4317 
4318 	ret = i915_mutex_lock_interruptible(dev);
4319 	if (ret)
4320 		return ret;
4321 
4322 	gobj = drm_gem_object_lookup(dev, file, args->handle);
4323 	if (gobj == NULL) {
4324 		ret = -ENOENT;
4325 		goto unlock;
4326 	}
4327 	obj = to_intel_bo(gobj);
4328 
4329 	switch (obj->cache_level) {
4330 	case I915_CACHE_LLC:
4331 	case I915_CACHE_L3_LLC:
4332 		args->caching = I915_CACHING_CACHED;
4333 		break;
4334 
4335 	case I915_CACHE_WT:
4336 		args->caching = I915_CACHING_DISPLAY;
4337 		break;
4338 
4339 	default:
4340 		args->caching = I915_CACHING_NONE;
4341 		break;
4342 	}
4343 
4344 	drm_gem_object_unreference(&obj->base);
4345 unlock:
4346 	mutex_unlock(&dev->struct_mutex);
4347 	return ret;
4348 }
4349 
i915_gem_set_caching_ioctl(struct drm_device * dev,void * data,struct drm_file * file)4350 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
4351 			       struct drm_file *file)
4352 {
4353 	struct drm_i915_gem_caching *args = data;
4354 	struct drm_gem_object *gobj;
4355 	struct drm_i915_gem_object *obj;
4356 	enum i915_cache_level level;
4357 	int ret;
4358 
4359 	switch (args->caching) {
4360 	case I915_CACHING_NONE:
4361 		level = I915_CACHE_NONE;
4362 		break;
4363 	case I915_CACHING_CACHED:
4364 		level = I915_CACHE_LLC;
4365 		break;
4366 	case I915_CACHING_DISPLAY:
4367 		level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE;
4368 		break;
4369 	default:
4370 		return -EINVAL;
4371 	}
4372 
4373 	ret = i915_mutex_lock_interruptible(dev);
4374 	if (ret)
4375 		return ret;
4376 
4377 	gobj = drm_gem_object_lookup(dev, file, args->handle);
4378 	if (gobj == NULL) {
4379 		ret = -ENOENT;
4380 		goto unlock;
4381 	}
4382 	obj = to_intel_bo(gobj);
4383 
4384 	ret = i915_gem_object_set_cache_level(obj, level);
4385 
4386 	drm_gem_object_unreference(&obj->base);
4387 unlock:
4388 	mutex_unlock(&dev->struct_mutex);
4389 	return ret;
4390 }
4391 
is_pin_display(struct drm_i915_gem_object * obj)4392 static bool is_pin_display(struct drm_i915_gem_object *obj)
4393 {
4394 	struct i915_vma *vma;
4395 
4396 	if (list_empty(&obj->vma_list))
4397 		return false;
4398 
4399 	vma = i915_gem_obj_to_ggtt(obj);
4400 	if (!vma)
4401 		return false;
4402 
4403 	/* There are 3 sources that pin objects:
4404 	 *   1. The display engine (scanouts, sprites, cursors);
4405 	 *   2. Reservations for execbuffer;
4406 	 *   3. The user.
4407 	 *
4408 	 * We can ignore reservations as we hold the struct_mutex and
4409 	 * are only called outside of the reservation path.  The user
4410 	 * can only increment pin_count once, and so if after
4411 	 * subtracting the potential reference by the user, any pin_count
4412 	 * remains, it must be due to another use by the display engine.
4413 	 */
4414 	return vma->pin_count - !!obj->user_pin_count;
4415 }
4416 
4417 /*
4418  * Prepare buffer for display plane (scanout, cursors, etc).
4419  * Can be called from an uninterruptible phase (modesetting) and allows
4420  * any flushes to be pipelined (for pageflips).
4421  */
4422 int
i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object * obj,u32 alignment,struct intel_ring_buffer * pipelined)4423 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
4424 				     u32 alignment,
4425 				     struct intel_ring_buffer *pipelined)
4426 {
4427 	u32 old_read_domains, old_write_domain;
4428 	bool was_pin_display;
4429 	int ret;
4430 
4431 	if (pipelined != obj->ring) {
4432 		ret = i915_gem_object_sync(obj, pipelined);
4433 		if (ret)
4434 			return ret;
4435 	}
4436 
4437 	/* Mark the pin_display early so that we account for the
4438 	 * display coherency whilst setting up the cache domains.
4439 	 */
4440 	was_pin_display = obj->pin_display;
4441 	obj->pin_display = true;
4442 
4443 	/* The display engine is not coherent with the LLC cache on gen6.  As
4444 	 * a result, we make sure that the pinning that is about to occur is
4445 	 * done with uncached PTEs. This is lowest common denominator for all
4446 	 * chipsets.
4447 	 *
4448 	 * However for gen6+, we could do better by using the GFDT bit instead
4449 	 * of uncaching, which would allow us to flush all the LLC-cached data
4450 	 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
4451 	 */
4452 	ret = i915_gem_object_set_cache_level(obj,
4453 					      HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE);
4454 	if (ret)
4455 		goto err_unpin_display;
4456 
4457 	/* As the user may map the buffer once pinned in the display plane
4458 	 * (e.g. libkms for the bootup splash), we have to ensure that we
4459 	 * always use map_and_fenceable for all scanout buffers.
4460 	 */
4461 	ret = i915_gem_obj_ggtt_pin(obj, alignment, PIN_MAPPABLE);
4462 	if (ret)
4463 		goto err_unpin_display;
4464 
4465 	i915_gem_object_flush_cpu_write_domain(obj, true);
4466 
4467 	old_write_domain = obj->base.write_domain;
4468 	old_read_domains = obj->base.read_domains;
4469 
4470 	/* It should now be out of any other write domains, and we can update
4471 	 * the domain values for our changes.
4472 	 */
4473 	obj->base.write_domain = 0;
4474 	obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
4475 
4476 	trace_i915_gem_object_change_domain(obj,
4477 					    old_read_domains,
4478 					    old_write_domain);
4479 
4480 	return 0;
4481 
4482 err_unpin_display:
4483 	WARN_ON(was_pin_display != is_pin_display(obj));
4484 	obj->pin_display = was_pin_display;
4485 	return ret;
4486 }
4487 
4488 void
i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object * obj)4489 i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj)
4490 {
4491 	i915_gem_object_ggtt_unpin(obj);
4492 	obj->pin_display = is_pin_display(obj);
4493 }
4494 
4495 int
i915_gem_object_finish_gpu(struct drm_i915_gem_object * obj)4496 i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj)
4497 {
4498 	int ret;
4499 
4500 	if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0)
4501 		return 0;
4502 
4503 	ret = i915_gem_object_wait_rendering(obj, false);
4504 	if (ret)
4505 		return ret;
4506 
4507 	/* Ensure that we invalidate the GPU's caches and TLBs. */
4508 	obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
4509 	return 0;
4510 }
4511 
4512 /**
4513  * Moves a single object to the CPU read, and possibly write domain.
4514  *
4515  * This function returns when the move is complete, including waiting on
4516  * flushes to occur.
4517  */
4518 int
i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object * obj,bool write)4519 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
4520 {
4521 	uint32_t old_write_domain, old_read_domains;
4522 	int ret;
4523 
4524 	if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
4525 		return 0;
4526 
4527 	ret = i915_gem_object_wait_rendering(obj, !write);
4528 	if (ret)
4529 		return ret;
4530 
4531 	i915_gem_object_flush_gtt_write_domain(obj);
4532 
4533 	old_write_domain = obj->base.write_domain;
4534 	old_read_domains = obj->base.read_domains;
4535 
4536 	/* Flush the CPU cache if it's still invalid. */
4537 	if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
4538 		i915_gem_clflush_object(obj, false);
4539 
4540 		obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
4541 	}
4542 
4543 	/* It should now be out of any other write domains, and we can update
4544 	 * the domain values for our changes.
4545 	 */
4546 	BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
4547 
4548 	/* If we're writing through the CPU, then the GPU read domains will
4549 	 * need to be invalidated at next use.
4550 	 */
4551 	if (write) {
4552 		obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4553 		obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4554 	}
4555 
4556 	trace_i915_gem_object_change_domain(obj,
4557 					    old_read_domains,
4558 					    old_write_domain);
4559 
4560 	return 0;
4561 }
4562 
4563 /* Throttle our rendering by waiting until the ring has completed our requests
4564  * emitted over 20 msec ago.
4565  *
4566  * Note that if we were to use the current jiffies each time around the loop,
4567  * we wouldn't escape the function with any frames outstanding if the time to
4568  * render a frame was over 20ms.
4569  *
4570  * This should get us reasonable parallelism between CPU and GPU but also
4571  * relatively low latency when blocking on a particular request to finish.
4572  */
4573 static int
i915_gem_ring_throttle(struct drm_device * dev,struct drm_file * file)4574 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
4575 {
4576 	struct drm_i915_private *dev_priv = dev->dev_private;
4577 	struct drm_i915_file_private *file_priv = file->driver_priv;
4578 	unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
4579 	struct drm_i915_gem_request *request;
4580 	struct intel_ring_buffer *ring = NULL;
4581 	unsigned reset_counter;
4582 	u32 seqno = 0;
4583 	int ret;
4584 
4585 	ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
4586 	if (ret)
4587 		return ret;
4588 
4589 	ret = i915_gem_check_wedge(&dev_priv->gpu_error, false);
4590 	if (ret)
4591 		return ret;
4592 
4593 	spin_lock(&file_priv->mm.lock);
4594 	list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
4595 		if (time_after_eq(request->emitted_jiffies, recent_enough))
4596 			break;
4597 
4598 		ring = request->ring;
4599 		seqno = request->seqno;
4600 	}
4601 	reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
4602 	spin_unlock(&file_priv->mm.lock);
4603 
4604 	if (seqno == 0)
4605 		return 0;
4606 
4607 	ret = __wait_seqno(ring, seqno, reset_counter, true, NULL, NULL);
4608 	if (ret == 0)
4609 		queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
4610 
4611 	return ret;
4612 }
4613 
4614 static bool
i915_vma_misplaced(struct i915_vma * vma,uint32_t alignment,uint64_t flags)4615 i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
4616 {
4617 	struct drm_i915_gem_object *obj = vma->obj;
4618 
4619 	if (alignment &&
4620 	    vma->node.start & (alignment - 1))
4621 		return true;
4622 
4623 	if (flags & PIN_MAPPABLE && !obj->map_and_fenceable)
4624 		return true;
4625 
4626 	if (flags & PIN_OFFSET_BIAS &&
4627 	    vma->node.start < (flags & PIN_OFFSET_MASK))
4628 		return true;
4629 
4630 	return false;
4631 }
4632 
4633 int
i915_gem_object_pin(struct drm_i915_gem_object * obj,struct i915_address_space * vm,uint32_t alignment,uint64_t flags)4634 i915_gem_object_pin(struct drm_i915_gem_object *obj,
4635 		    struct i915_address_space *vm,
4636 		    uint32_t alignment,
4637 		    uint64_t flags)
4638 {
4639 	struct i915_vma *vma;
4640 	int ret;
4641 
4642 	if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm)))
4643 		return -EINVAL;
4644 
4645 	vma = i915_gem_obj_to_vma(obj, vm);
4646 	if (vma) {
4647 		if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
4648 			return -EBUSY;
4649 
4650 		if (i915_vma_misplaced(vma, alignment, flags)) {
4651 			WARN(vma->pin_count,
4652 			     "bo is already pinned with incorrect alignment:"
4653 			     " offset=%lx, req.alignment=%x, req.map_and_fenceable=%d,"
4654 			     " obj->map_and_fenceable=%d\n",
4655 			     i915_gem_obj_offset(obj, vm), alignment,
4656 			     !!(flags & PIN_MAPPABLE),
4657 			     obj->map_and_fenceable);
4658 			ret = i915_vma_unbind(vma);
4659 			if (ret)
4660 				return ret;
4661 
4662 			vma = NULL;
4663 		}
4664 	}
4665 
4666 	if (vma == NULL || !drm_mm_node_allocated(&vma->node)) {
4667 		vma = i915_gem_object_bind_to_vm(obj, vm, alignment, flags);
4668 		if (IS_ERR(vma))
4669 			return PTR_ERR(vma);
4670 	}
4671 
4672 	if (flags & PIN_GLOBAL && !obj->has_global_gtt_mapping)
4673 		vma->bind_vma(vma, obj->cache_level, GLOBAL_BIND);
4674 
4675 	vma->pin_count++;
4676 	if (flags & PIN_MAPPABLE)
4677 		obj->pin_mappable |= true;
4678 
4679 	return 0;
4680 }
4681 
4682 void
i915_gem_object_ggtt_unpin(struct drm_i915_gem_object * obj)4683 i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj)
4684 {
4685 	struct i915_vma *vma = i915_gem_obj_to_ggtt(obj);
4686 
4687 	BUG_ON(!vma);
4688 	BUG_ON(vma->pin_count == 0);
4689 	BUG_ON(!i915_gem_obj_ggtt_bound(obj));
4690 
4691 	if (--vma->pin_count == 0)
4692 		obj->pin_mappable = false;
4693 }
4694 
4695 int
i915_gem_pin_ioctl(struct drm_device * dev,void * data,struct drm_file * file)4696 i915_gem_pin_ioctl(struct drm_device *dev, void *data,
4697 		   struct drm_file *file)
4698 {
4699 	struct drm_i915_gem_pin *args = data;
4700 	struct drm_gem_object *gobj;
4701 	struct drm_i915_gem_object *obj;
4702 	int ret;
4703 
4704 	if (INTEL_INFO(dev)->gen >= 6)
4705 		return -ENODEV;
4706 
4707 	ret = i915_mutex_lock_interruptible(dev);
4708 	if (ret)
4709 		return ret;
4710 
4711 	gobj = drm_gem_object_lookup(dev, file, args->handle);
4712 	if (gobj == NULL) {
4713 		ret = -ENOENT;
4714 		goto unlock;
4715 	}
4716 	obj = to_intel_bo(gobj);
4717 
4718 	if (obj->madv != I915_MADV_WILLNEED) {
4719 		DRM_DEBUG("Attempting to pin a purgeable buffer\n");
4720 		ret = -EFAULT;
4721 		goto out;
4722 	}
4723 
4724 	if (obj->pin_filp != NULL && obj->pin_filp != file) {
4725 		DRM_DEBUG("Already pinned in i915_gem_pin_ioctl(): %d\n",
4726 			  args->handle);
4727 		ret = -EINVAL;
4728 		goto out;
4729 	}
4730 
4731 	if (obj->user_pin_count == ULONG_MAX) {
4732 		ret = -EBUSY;
4733 		goto out;
4734 	}
4735 
4736 	if (obj->user_pin_count == 0) {
4737 		ret = i915_gem_obj_ggtt_pin(obj, args->alignment, PIN_MAPPABLE);
4738 		if (ret)
4739 			goto out;
4740 	}
4741 
4742 	obj->user_pin_count++;
4743 	obj->pin_filp = file;
4744 
4745 	args->offset = i915_gem_obj_ggtt_offset(obj);
4746 out:
4747 	drm_gem_object_unreference(&obj->base);
4748 unlock:
4749 	mutex_unlock(&dev->struct_mutex);
4750 	return ret;
4751 }
4752 
4753 int
i915_gem_unpin_ioctl(struct drm_device * dev,void * data,struct drm_file * file)4754 i915_gem_unpin_ioctl(struct drm_device *dev, void *data,
4755 		     struct drm_file *file)
4756 {
4757 	struct drm_i915_gem_pin *args = data;
4758 	struct drm_gem_object *gobj;
4759 	struct drm_i915_gem_object *obj;
4760 	int ret;
4761 
4762 	ret = i915_mutex_lock_interruptible(dev);
4763 	if (ret)
4764 		return ret;
4765 
4766 	gobj = drm_gem_object_lookup(dev, file, args->handle);
4767 	if (gobj == NULL) {
4768 		ret = -ENOENT;
4769 		goto unlock;
4770 	}
4771 	obj = to_intel_bo(gobj);
4772 
4773 	if (obj->pin_filp != file) {
4774 		DRM_DEBUG("Not pinned by caller in i915_gem_pin_ioctl(): %d\n",
4775 			  args->handle);
4776 		ret = -EINVAL;
4777 		goto out;
4778 	}
4779 	obj->user_pin_count--;
4780 	if (obj->user_pin_count == 0) {
4781 		obj->pin_filp = NULL;
4782 		i915_gem_object_ggtt_unpin(obj);
4783 	}
4784 
4785 out:
4786 	drm_gem_object_unreference(&obj->base);
4787 unlock:
4788 	mutex_unlock(&dev->struct_mutex);
4789 	return ret;
4790 }
4791 
4792 int
i915_gem_busy_ioctl(struct drm_device * dev,void * data,struct drm_file * file)4793 i915_gem_busy_ioctl(struct drm_device *dev, void *data,
4794 		    struct drm_file *file)
4795 {
4796 	struct drm_i915_gem_busy *args = data;
4797 	struct drm_gem_object *gobj;
4798 	struct drm_i915_gem_object *obj;
4799 	int ret;
4800 
4801 	ret = i915_mutex_lock_interruptible(dev);
4802 	if (ret)
4803 		return ret;
4804 
4805 	gobj = drm_gem_object_lookup(dev, file, args->handle);
4806 	if (gobj == NULL) {
4807 		ret = -ENOENT;
4808 		goto unlock;
4809 	}
4810 	obj = to_intel_bo(gobj);
4811 
4812 	/* Count all active objects as busy, even if they are currently not used
4813 	 * by the gpu. Users of this interface expect objects to eventually
4814 	 * become non-busy without any further actions, therefore emit any
4815 	 * necessary flushes here.
4816 	 */
4817 	ret = i915_gem_object_flush_active(obj);
4818 
4819 	args->busy = obj->active;
4820 	if (obj->ring) {
4821 		BUILD_BUG_ON(I915_NUM_RINGS > 16);
4822 		args->busy |= intel_ring_flag(obj->ring) << 16;
4823 	}
4824 
4825 	drm_gem_object_unreference(&obj->base);
4826 unlock:
4827 	mutex_unlock(&dev->struct_mutex);
4828 	return ret;
4829 }
4830 
4831 int
i915_gem_throttle_ioctl(struct drm_device * dev,void * data,struct drm_file * file_priv)4832 i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
4833 			struct drm_file *file_priv)
4834 {
4835 	return i915_gem_ring_throttle(dev, file_priv);
4836 }
4837 
4838 int
i915_gem_madvise_ioctl(struct drm_device * dev,void * data,struct drm_file * file_priv)4839 i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
4840 		       struct drm_file *file_priv)
4841 {
4842 	struct drm_i915_gem_madvise *args = data;
4843 	struct drm_gem_object *gobj;
4844 	struct drm_i915_gem_object *obj;
4845 	int ret;
4846 
4847 	switch (args->madv) {
4848 	case I915_MADV_DONTNEED:
4849 	case I915_MADV_WILLNEED:
4850 	    break;
4851 	default:
4852 	    return -EINVAL;
4853 	}
4854 
4855 	ret = i915_mutex_lock_interruptible(dev);
4856 	if (ret)
4857 		return ret;
4858 
4859 	gobj = drm_gem_object_lookup(dev, file_priv, args->handle);
4860 	if (gobj == NULL) {
4861 		ret = -ENOENT;
4862 		goto unlock;
4863 	}
4864 	obj = to_intel_bo(gobj);
4865 
4866 	if (i915_gem_obj_is_pinned(obj)) {
4867 		ret = -EINVAL;
4868 		goto out;
4869 	}
4870 
4871 	if (obj->madv != __I915_MADV_PURGED)
4872 		obj->madv = args->madv;
4873 
4874 	/* if the object is no longer attached, discard its backing storage */
4875 	if (i915_gem_object_is_purgeable(obj) && obj->pages == NULL)
4876 		i915_gem_object_truncate(obj);
4877 
4878 	args->retained = obj->madv != __I915_MADV_PURGED;
4879 
4880 out:
4881 	drm_gem_object_unreference(&obj->base);
4882 unlock:
4883 	mutex_unlock(&dev->struct_mutex);
4884 	return ret;
4885 }
4886 
i915_gem_object_init(struct drm_i915_gem_object * obj,const struct drm_i915_gem_object_ops * ops)4887 void i915_gem_object_init(struct drm_i915_gem_object *obj,
4888 			  const struct drm_i915_gem_object_ops *ops)
4889 {
4890 	INIT_LIST_HEAD(&obj->global_list);
4891 	INIT_LIST_HEAD(&obj->ring_list);
4892 	INIT_LIST_HEAD(&obj->obj_exec_link);
4893 	INIT_LIST_HEAD(&obj->vma_list);
4894 
4895 	obj->ops = ops;
4896 
4897 	obj->fence_reg = I915_FENCE_REG_NONE;
4898 	obj->madv = I915_MADV_WILLNEED;
4899 	/* Avoid an unnecessary call to unbind on the first bind. */
4900 	obj->map_and_fenceable = true;
4901 
4902 	i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size);
4903 }
4904 
4905 static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
4906 	.get_pages = i915_gem_object_get_pages_gtt,
4907 	.put_pages = i915_gem_object_put_pages_gtt,
4908 };
4909 
i915_gem_alloc_object(struct drm_device * dev,size_t size)4910 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
4911 						  size_t size)
4912 {
4913 #ifdef __NetBSD__
4914 	struct drm_i915_private *const dev_priv = dev->dev_private;
4915 #endif
4916 	struct drm_i915_gem_object *obj;
4917 #ifndef __NetBSD__
4918 	struct address_space *mapping;
4919 	gfp_t mask;
4920 #endif
4921 
4922 	obj = i915_gem_object_alloc(dev);
4923 	if (obj == NULL)
4924 		return NULL;
4925 
4926 	if (drm_gem_object_init(dev, &obj->base, size) != 0) {
4927 		i915_gem_object_free(obj);
4928 		return NULL;
4929 	}
4930 
4931 #ifdef __NetBSD__
4932 	uao_set_pgfl(obj->base.gemo_shm_uao, dev_priv->gtt.pgfl);
4933 #else
4934 	mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
4935 	if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) {
4936 		/* 965gm cannot relocate objects above 4GiB. */
4937 		mask &= ~__GFP_HIGHMEM;
4938 		mask |= __GFP_DMA32;
4939 	}
4940 
4941 	mapping = file_inode(obj->base.filp)->i_mapping;
4942 	mapping_set_gfp_mask(mapping, mask);
4943 #endif
4944 
4945 	i915_gem_object_init(obj, &i915_gem_object_ops);
4946 
4947 	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4948 	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4949 
4950 	if (HAS_LLC(dev)) {
4951 		/* On some devices, we can have the GPU use the LLC (the CPU
4952 		 * cache) for about a 10% performance improvement
4953 		 * compared to uncached.  Graphics requests other than
4954 		 * display scanout are coherent with the CPU in
4955 		 * accessing this cache.  This means in this mode we
4956 		 * don't need to clflush on the CPU side, and on the
4957 		 * GPU side we only need to flush internal caches to
4958 		 * get data visible to the CPU.
4959 		 *
4960 		 * However, we maintain the display planes as UC, and so
4961 		 * need to rebind when first used as such.
4962 		 */
4963 		obj->cache_level = I915_CACHE_LLC;
4964 	} else
4965 		obj->cache_level = I915_CACHE_NONE;
4966 
4967 	trace_i915_gem_object_create(obj);
4968 
4969 	return obj;
4970 }
4971 
i915_gem_free_object(struct drm_gem_object * gem_obj)4972 void i915_gem_free_object(struct drm_gem_object *gem_obj)
4973 {
4974 	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
4975 	struct drm_device *dev = obj->base.dev;
4976 	struct drm_i915_private *dev_priv = dev->dev_private;
4977 	struct i915_vma *vma, *next;
4978 
4979 	intel_runtime_pm_get(dev_priv);
4980 
4981 	trace_i915_gem_object_destroy(obj);
4982 
4983 	list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
4984 		int ret;
4985 
4986 		vma->pin_count = 0;
4987 		ret = i915_vma_unbind(vma);
4988 		if (WARN_ON(ret == -ERESTARTSYS)) {
4989 			bool was_interruptible;
4990 
4991 			was_interruptible = dev_priv->mm.interruptible;
4992 			dev_priv->mm.interruptible = false;
4993 
4994 			WARN_ON(i915_vma_unbind(vma));
4995 
4996 			dev_priv->mm.interruptible = was_interruptible;
4997 		}
4998 	}
4999 
5000 	i915_gem_object_detach_phys(obj);
5001 
5002 	/* Stolen objects don't hold a ref, but do hold pin count. Fix that up
5003 	 * before progressing. */
5004 	if (obj->stolen)
5005 		i915_gem_object_unpin_pages(obj);
5006 
5007 	if (WARN_ON(obj->pages_pin_count))
5008 		obj->pages_pin_count = 0;
5009 	i915_gem_object_put_pages(obj);
5010 	i915_gem_object_free_mmap_offset(obj);
5011 	i915_gem_object_release_stolen(obj);
5012 
5013 	BUG_ON(obj->pages);
5014 
5015 #ifndef __NetBSD__		/* XXX drm prime */
5016 	if (obj->base.import_attach)
5017 		drm_prime_gem_destroy(&obj->base, NULL);
5018 #endif
5019 
5020 	drm_gem_object_release(&obj->base);
5021 	i915_gem_info_remove_obj(dev_priv, obj->base.size);
5022 
5023 	kfree(obj->bit_17);
5024 	i915_gem_object_free(obj);
5025 
5026 	intel_runtime_pm_put(dev_priv);
5027 }
5028 
i915_gem_obj_to_vma(struct drm_i915_gem_object * obj,struct i915_address_space * vm)5029 struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
5030 				     struct i915_address_space *vm)
5031 {
5032 	struct i915_vma *vma;
5033 	list_for_each_entry(vma, &obj->vma_list, vma_link)
5034 		if (vma->vm == vm)
5035 			return vma;
5036 
5037 	return NULL;
5038 }
5039 
i915_gem_vma_destroy(struct i915_vma * vma)5040 void i915_gem_vma_destroy(struct i915_vma *vma)
5041 {
5042 	WARN_ON(vma->node.allocated);
5043 
5044 	/* Keep the vma as a placeholder in the execbuffer reservation lists */
5045 	if (!list_empty(&vma->exec_list))
5046 		return;
5047 
5048 	list_del(&vma->vma_link);
5049 
5050 	kfree(vma);
5051 }
5052 
5053 int
i915_gem_suspend(struct drm_device * dev)5054 i915_gem_suspend(struct drm_device *dev)
5055 {
5056 	struct drm_i915_private *dev_priv = dev->dev_private;
5057 	int ret = 0;
5058 
5059 	mutex_lock(&dev->struct_mutex);
5060 	if (dev_priv->ums.mm_suspended)
5061 		goto err;
5062 
5063 	ret = i915_gpu_idle(dev);
5064 	if (ret)
5065 		goto err;
5066 
5067 	i915_gem_retire_requests(dev);
5068 
5069 	/* Under UMS, be paranoid and evict. */
5070 	if (!drm_core_check_feature(dev, DRIVER_MODESET))
5071 		i915_gem_evict_everything(dev);
5072 
5073 	i915_kernel_lost_context(dev);
5074 	i915_gem_cleanup_ringbuffer(dev);
5075 
5076 	/* Hack!  Don't let anybody do execbuf while we don't control the chip.
5077 	 * We need to replace this with a semaphore, or something.
5078 	 * And not confound ums.mm_suspended!
5079 	 */
5080 	dev_priv->ums.mm_suspended = !drm_core_check_feature(dev,
5081 							     DRIVER_MODESET);
5082 	mutex_unlock(&dev->struct_mutex);
5083 
5084 	del_timer_sync(&dev_priv->gpu_error.hangcheck_timer);
5085 	cancel_delayed_work_sync(&dev_priv->mm.retire_work);
5086 	cancel_delayed_work_sync(&dev_priv->mm.idle_work);
5087 
5088 	return 0;
5089 
5090 err:
5091 	mutex_unlock(&dev->struct_mutex);
5092 	return ret;
5093 }
5094 
i915_gem_l3_remap(struct intel_ring_buffer * ring,int slice)5095 int i915_gem_l3_remap(struct intel_ring_buffer *ring, int slice)
5096 {
5097 	struct drm_device *dev = ring->dev;
5098 	struct drm_i915_private *dev_priv = dev->dev_private;
5099 	u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200);
5100 	u32 *remap_info = dev_priv->l3_parity.remap_info[slice];
5101 	int i, ret;
5102 
5103 	if (!HAS_L3_DPF(dev) || !remap_info)
5104 		return 0;
5105 
5106 	ret = intel_ring_begin(ring, GEN7_L3LOG_SIZE / 4 * 3);
5107 	if (ret)
5108 		return ret;
5109 
5110 	/*
5111 	 * Note: We do not worry about the concurrent register cacheline hang
5112 	 * here because no other code should access these registers other than
5113 	 * at initialization time.
5114 	 */
5115 	for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) {
5116 		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
5117 		intel_ring_emit(ring, reg_base + i);
5118 		intel_ring_emit(ring, remap_info[i/4]);
5119 	}
5120 
5121 	intel_ring_advance(ring);
5122 
5123 	return ret;
5124 }
5125 
i915_gem_init_swizzling(struct drm_device * dev)5126 void i915_gem_init_swizzling(struct drm_device *dev)
5127 {
5128 	struct drm_i915_private *dev_priv = dev->dev_private;
5129 
5130 	if (INTEL_INFO(dev)->gen < 5 ||
5131 	    dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
5132 		return;
5133 
5134 	I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
5135 				 DISP_TILE_SURFACE_SWIZZLING);
5136 
5137 	if (IS_GEN5(dev))
5138 		return;
5139 
5140 	I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
5141 	if (IS_GEN6(dev))
5142 		I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
5143 	else if (IS_GEN7(dev))
5144 		I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
5145 	else if (IS_GEN8(dev))
5146 		I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW));
5147 	else
5148 		BUG();
5149 }
5150 
5151 static bool
intel_enable_blt(struct drm_device * dev)5152 intel_enable_blt(struct drm_device *dev)
5153 {
5154 	if (!HAS_BLT(dev))
5155 		return false;
5156 
5157 	/* The blitter was dysfunctional on early prototypes */
5158 	if (IS_GEN6(dev) && dev->pdev->revision < 8) {
5159 		DRM_INFO("BLT not supported on this pre-production hardware;"
5160 			 " graphics performance will be degraded.\n");
5161 		return false;
5162 	}
5163 
5164 	return true;
5165 }
5166 
i915_gem_init_rings(struct drm_device * dev)5167 static int i915_gem_init_rings(struct drm_device *dev)
5168 {
5169 	struct drm_i915_private *dev_priv = dev->dev_private;
5170 	int ret;
5171 
5172 	ret = intel_init_render_ring_buffer(dev);
5173 	if (ret)
5174 		return ret;
5175 
5176 	if (HAS_BSD(dev)) {
5177 		ret = intel_init_bsd_ring_buffer(dev);
5178 		if (ret)
5179 			goto cleanup_render_ring;
5180 	}
5181 
5182 	if (intel_enable_blt(dev)) {
5183 		ret = intel_init_blt_ring_buffer(dev);
5184 		if (ret)
5185 			goto cleanup_bsd_ring;
5186 	}
5187 
5188 	if (HAS_VEBOX(dev)) {
5189 		ret = intel_init_vebox_ring_buffer(dev);
5190 		if (ret)
5191 			goto cleanup_blt_ring;
5192 	}
5193 
5194 
5195 	ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000));
5196 	if (ret)
5197 		goto cleanup_vebox_ring;
5198 
5199 	return 0;
5200 
5201 cleanup_vebox_ring:
5202 	intel_cleanup_ring_buffer(&dev_priv->ring[VECS]);
5203 cleanup_blt_ring:
5204 	intel_cleanup_ring_buffer(&dev_priv->ring[BCS]);
5205 cleanup_bsd_ring:
5206 	intel_cleanup_ring_buffer(&dev_priv->ring[VCS]);
5207 cleanup_render_ring:
5208 	intel_cleanup_ring_buffer(&dev_priv->ring[RCS]);
5209 
5210 	return ret;
5211 }
5212 
5213 int
i915_gem_init_hw(struct drm_device * dev)5214 i915_gem_init_hw(struct drm_device *dev)
5215 {
5216 	struct drm_i915_private *dev_priv = dev->dev_private;
5217 	int ret, i;
5218 
5219 	if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt())
5220 		return -EIO;
5221 
5222 	if (dev_priv->ellc_size)
5223 		I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf));
5224 
5225 	if (IS_HASWELL(dev))
5226 		I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev) ?
5227 			   LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
5228 
5229 	if (HAS_PCH_NOP(dev)) {
5230 		if (IS_IVYBRIDGE(dev)) {
5231 			u32 temp = I915_READ(GEN7_MSG_CTL);
5232 			temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK);
5233 			I915_WRITE(GEN7_MSG_CTL, temp);
5234 		} else if (INTEL_INFO(dev)->gen >= 7) {
5235 			u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT);
5236 			temp &= ~RESET_PCH_HANDSHAKE_ENABLE;
5237 			I915_WRITE(HSW_NDE_RSTWRN_OPT, temp);
5238 		}
5239 	}
5240 
5241 	i915_gem_init_swizzling(dev);
5242 
5243 	ret = i915_gem_init_rings(dev);
5244 	if (ret)
5245 		return ret;
5246 
5247 	for (i = 0; i < NUM_L3_SLICES(dev); i++)
5248 		i915_gem_l3_remap(&dev_priv->ring[RCS], i);
5249 
5250 	/*
5251 	 * XXX: Contexts should only be initialized once. Doing a switch to the
5252 	 * default context switch however is something we'd like to do after
5253 	 * reset or thaw (the latter may not actually be necessary for HW, but
5254 	 * goes with our code better). Context switching requires rings (for
5255 	 * the do_switch), but before enabling PPGTT. So don't move this.
5256 	 */
5257 	ret = i915_gem_context_enable(dev_priv);
5258 	if (ret) {
5259 		DRM_ERROR("Context enable failed %d\n", ret);
5260 		goto err_out;
5261 	}
5262 
5263 	return 0;
5264 
5265 err_out:
5266 	i915_gem_cleanup_ringbuffer(dev);
5267 	return ret;
5268 }
5269 
i915_gem_init(struct drm_device * dev)5270 int i915_gem_init(struct drm_device *dev)
5271 {
5272 	struct drm_i915_private *dev_priv = dev->dev_private;
5273 	int ret;
5274 
5275 	mutex_lock(&dev->struct_mutex);
5276 
5277 	if (IS_VALLEYVIEW(dev)) {
5278 		/* VLVA0 (potential hack), BIOS isn't actually waking us */
5279 		I915_WRITE(VLV_GTLC_WAKE_CTRL, 1);
5280 		if (wait_for((I915_READ(VLV_GTLC_PW_STATUS) & 1) == 1, 10))
5281 			DRM_DEBUG_DRIVER("allow wake ack timed out\n");
5282 	}
5283 	i915_gem_init_global_gtt(dev);
5284 
5285 	ret = i915_gem_context_init(dev);
5286 	if (ret) {
5287 		mutex_unlock(&dev->struct_mutex);
5288 		return ret;
5289 	}
5290 
5291 	ret = i915_gem_init_hw(dev);
5292 	mutex_unlock(&dev->struct_mutex);
5293 	if (ret) {
5294 		WARN_ON(dev_priv->mm.aliasing_ppgtt);
5295 		i915_gem_context_fini(dev);
5296 		drm_mm_takedown(&dev_priv->gtt.base.mm);
5297 		return ret;
5298 	}
5299 
5300 	/* Allow hardware batchbuffers unless told otherwise, but not for KMS. */
5301 	if (!drm_core_check_feature(dev, DRIVER_MODESET))
5302 		dev_priv->dri1.allow_batchbuffer = 1;
5303 	return 0;
5304 }
5305 
5306 void
i915_gem_cleanup_ringbuffer(struct drm_device * dev)5307 i915_gem_cleanup_ringbuffer(struct drm_device *dev)
5308 {
5309 	struct drm_i915_private *dev_priv = dev->dev_private;
5310 	struct intel_ring_buffer *ring;
5311 	int i;
5312 
5313 	for_each_ring(ring, dev_priv, i)
5314 		intel_cleanup_ring_buffer(ring);
5315 }
5316 
5317 int
i915_gem_entervt_ioctl(struct drm_device * dev,void * data,struct drm_file * file_priv)5318 i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
5319 		       struct drm_file *file_priv)
5320 {
5321 	struct drm_i915_private *dev_priv = dev->dev_private;
5322 	int ret;
5323 
5324 	if (drm_core_check_feature(dev, DRIVER_MODESET))
5325 		return 0;
5326 
5327 	if (i915_reset_in_progress(&dev_priv->gpu_error)) {
5328 		DRM_ERROR("Reenabling wedged hardware, good luck\n");
5329 		atomic_set(&dev_priv->gpu_error.reset_counter, 0);
5330 	}
5331 
5332 	mutex_lock(&dev->struct_mutex);
5333 	dev_priv->ums.mm_suspended = 0;
5334 
5335 	ret = i915_gem_init_hw(dev);
5336 	if (ret != 0) {
5337 		mutex_unlock(&dev->struct_mutex);
5338 		return ret;
5339 	}
5340 
5341 	BUG_ON(!list_empty(&dev_priv->gtt.base.active_list));
5342 	mutex_unlock(&dev->struct_mutex);
5343 
5344 	ret = drm_irq_install(dev);
5345 	if (ret)
5346 		goto cleanup_ringbuffer;
5347 
5348 	return 0;
5349 
5350 cleanup_ringbuffer:
5351 	mutex_lock(&dev->struct_mutex);
5352 	i915_gem_cleanup_ringbuffer(dev);
5353 	dev_priv->ums.mm_suspended = 1;
5354 	mutex_unlock(&dev->struct_mutex);
5355 
5356 	return ret;
5357 }
5358 
5359 int
i915_gem_leavevt_ioctl(struct drm_device * dev,void * data,struct drm_file * file_priv)5360 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
5361 		       struct drm_file *file_priv)
5362 {
5363 	if (drm_core_check_feature(dev, DRIVER_MODESET))
5364 		return 0;
5365 
5366 	drm_irq_uninstall(dev);
5367 
5368 	return i915_gem_suspend(dev);
5369 }
5370 
5371 void
i915_gem_lastclose(struct drm_device * dev)5372 i915_gem_lastclose(struct drm_device *dev)
5373 {
5374 	int ret;
5375 
5376 	if (drm_core_check_feature(dev, DRIVER_MODESET))
5377 		return;
5378 
5379 	ret = i915_gem_suspend(dev);
5380 	if (ret)
5381 		DRM_ERROR("failed to idle hardware: %d\n", ret);
5382 }
5383 
5384 static void
init_ring_lists(struct intel_ring_buffer * ring)5385 init_ring_lists(struct intel_ring_buffer *ring)
5386 {
5387 	INIT_LIST_HEAD(&ring->active_list);
5388 	INIT_LIST_HEAD(&ring->request_list);
5389 }
5390 
i915_init_vm(struct drm_i915_private * dev_priv,struct i915_address_space * vm)5391 void i915_init_vm(struct drm_i915_private *dev_priv,
5392 		  struct i915_address_space *vm)
5393 {
5394 	if (!i915_is_ggtt(vm))
5395 		drm_mm_init(&vm->mm, vm->start, vm->total);
5396 	vm->dev = dev_priv->dev;
5397 	INIT_LIST_HEAD(&vm->active_list);
5398 	INIT_LIST_HEAD(&vm->inactive_list);
5399 	INIT_LIST_HEAD(&vm->global_link);
5400 	list_add_tail(&vm->global_link, &dev_priv->vm_list);
5401 }
5402 
5403 void
i915_gem_load(struct drm_device * dev)5404 i915_gem_load(struct drm_device *dev)
5405 {
5406 	struct drm_i915_private *dev_priv = dev->dev_private;
5407 	int i;
5408 
5409 	dev_priv->slab =
5410 		kmem_cache_create("i915_gem_object",
5411 				  sizeof(struct drm_i915_gem_object), 0,
5412 				  SLAB_HWCACHE_ALIGN,
5413 				  NULL);
5414 
5415 	INIT_LIST_HEAD(&dev_priv->vm_list);
5416 	i915_init_vm(dev_priv, &dev_priv->gtt.base);
5417 
5418 	INIT_LIST_HEAD(&dev_priv->context_list);
5419 	INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
5420 	INIT_LIST_HEAD(&dev_priv->mm.bound_list);
5421 	INIT_LIST_HEAD(&dev_priv->mm.fence_list);
5422 	for (i = 0; i < I915_NUM_RINGS; i++)
5423 		init_ring_lists(&dev_priv->ring[i]);
5424 	for (i = 0; i < I915_MAX_NUM_FENCES; i++)
5425 		INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
5426 	INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
5427 			  i915_gem_retire_work_handler);
5428 	INIT_DELAYED_WORK(&dev_priv->mm.idle_work,
5429 			  i915_gem_idle_work_handler);
5430 #ifdef __NetBSD__
5431 	spin_lock_init(&dev_priv->gpu_error.reset_lock);
5432 	DRM_INIT_WAITQUEUE(&dev_priv->gpu_error.reset_queue, "i915errst");
5433 #else
5434 	init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
5435 #endif
5436 
5437 	/* On GEN3 we really need to make sure the ARB C3 LP bit is set */
5438 	if (IS_GEN3(dev)) {
5439 		I915_WRITE(MI_ARB_STATE,
5440 			   _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE));
5441 	}
5442 
5443 	dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
5444 
5445 	/* Old X drivers will take 0-2 for front, back, depth buffers */
5446 	if (!drm_core_check_feature(dev, DRIVER_MODESET))
5447 		dev_priv->fence_reg_start = 3;
5448 
5449 	if (INTEL_INFO(dev)->gen >= 7 && !IS_VALLEYVIEW(dev))
5450 		dev_priv->num_fence_regs = 32;
5451 	else if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
5452 		dev_priv->num_fence_regs = 16;
5453 	else
5454 		dev_priv->num_fence_regs = 8;
5455 
5456 	/* Initialize fence registers to zero */
5457 	INIT_LIST_HEAD(&dev_priv->mm.fence_list);
5458 	i915_gem_restore_fences(dev);
5459 
5460 	i915_gem_detect_bit_6_swizzle(dev);
5461 #ifdef __NetBSD__
5462 	DRM_INIT_WAITQUEUE(&dev_priv->pending_flip_queue, "i915flip");
5463 	spin_lock_init(&dev_priv->pending_flip_lock);
5464 #else
5465 	init_waitqueue_head(&dev_priv->pending_flip_queue);
5466 #endif
5467 
5468 	dev_priv->mm.interruptible = true;
5469 
5470 	dev_priv->mm.inactive_shrinker.scan_objects = i915_gem_inactive_scan;
5471 	dev_priv->mm.inactive_shrinker.count_objects = i915_gem_inactive_count;
5472 	dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS;
5473 	register_shrinker(&dev_priv->mm.inactive_shrinker);
5474 }
5475 
i915_gem_release(struct drm_device * dev,struct drm_file * file)5476 void i915_gem_release(struct drm_device *dev, struct drm_file *file)
5477 {
5478 	struct drm_i915_file_private *file_priv = file->driver_priv;
5479 
5480 	cancel_delayed_work_sync(&file_priv->mm.idle_work);
5481 
5482 	/* Clean up our request list when the client is going away, so that
5483 	 * later retire_requests won't dereference our soon-to-be-gone
5484 	 * file_priv.
5485 	 */
5486 	spin_lock(&file_priv->mm.lock);
5487 	while (!list_empty(&file_priv->mm.request_list)) {
5488 		struct drm_i915_gem_request *request;
5489 
5490 		request = list_first_entry(&file_priv->mm.request_list,
5491 					   struct drm_i915_gem_request,
5492 					   client_list);
5493 		list_del(&request->client_list);
5494 		request->file_priv = NULL;
5495 	}
5496 	spin_unlock(&file_priv->mm.lock);
5497 }
5498 
5499 static void
i915_gem_file_idle_work_handler(struct work_struct * work)5500 i915_gem_file_idle_work_handler(struct work_struct *work)
5501 {
5502 	struct drm_i915_file_private *file_priv =
5503 		container_of(work, typeof(*file_priv), mm.idle_work.work);
5504 
5505 	atomic_set(&file_priv->rps_wait_boost, false);
5506 }
5507 
i915_gem_open(struct drm_device * dev,struct drm_file * file)5508 int i915_gem_open(struct drm_device *dev, struct drm_file *file)
5509 {
5510 	struct drm_i915_file_private *file_priv;
5511 	int ret;
5512 
5513 	DRM_DEBUG_DRIVER("\n");
5514 
5515 	file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
5516 	if (!file_priv)
5517 		return -ENOMEM;
5518 
5519 	file->driver_priv = file_priv;
5520 	file_priv->dev_priv = dev->dev_private;
5521 	file_priv->file = file;
5522 
5523 	spin_lock_init(&file_priv->mm.lock);
5524 	INIT_LIST_HEAD(&file_priv->mm.request_list);
5525 	INIT_DELAYED_WORK(&file_priv->mm.idle_work,
5526 			  i915_gem_file_idle_work_handler);
5527 
5528 	ret = i915_gem_context_open(dev, file);
5529 	if (ret)
5530 		kfree(file_priv);
5531 
5532 	return ret;
5533 }
5534 
5535 #ifndef __NetBSD__
mutex_is_locked_by(struct mutex * mutex,struct task_struct * task)5536 static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task)
5537 {
5538 	if (!mutex_is_locked(mutex))
5539 		return false;
5540 
5541 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES)
5542 	return mutex->owner == task;
5543 #else
5544 	/* Since UP may be pre-empted, we cannot assume that we own the lock */
5545 	return false;
5546 #endif
5547 }
5548 #endif
5549 
5550 static unsigned long
i915_gem_inactive_count(struct shrinker * shrinker,struct shrink_control * sc)5551 i915_gem_inactive_count(struct shrinker *shrinker, struct shrink_control *sc)
5552 {
5553 #ifdef __NetBSD__		/* XXX shrinkers */
5554 	return 0;
5555 #else
5556 	struct drm_i915_private *dev_priv =
5557 		container_of(shrinker,
5558 			     struct drm_i915_private,
5559 			     mm.inactive_shrinker);
5560 	struct drm_device *dev = dev_priv->dev;
5561 	struct drm_i915_gem_object *obj;
5562 	bool unlock = true;
5563 	unsigned long count;
5564 
5565 	if (!mutex_trylock(&dev->struct_mutex)) {
5566 		if (!mutex_is_locked_by(&dev->struct_mutex, current))
5567 			return 0;
5568 
5569 		if (dev_priv->mm.shrinker_no_lock_stealing)
5570 			return 0;
5571 
5572 		unlock = false;
5573 	}
5574 
5575 	count = 0;
5576 	list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list)
5577 		if (obj->pages_pin_count == 0)
5578 			count += obj->base.size >> PAGE_SHIFT;
5579 
5580 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
5581 		if (obj->active)
5582 			continue;
5583 
5584 		if (!i915_gem_obj_is_pinned(obj) && obj->pages_pin_count == 0)
5585 			count += obj->base.size >> PAGE_SHIFT;
5586 	}
5587 
5588 	if (unlock)
5589 		mutex_unlock(&dev->struct_mutex);
5590 
5591 	return count;
5592 #endif
5593 }
5594 
5595 /* All the new VM stuff */
i915_gem_obj_offset(struct drm_i915_gem_object * o,struct i915_address_space * vm)5596 unsigned long i915_gem_obj_offset(struct drm_i915_gem_object *o,
5597 				  struct i915_address_space *vm)
5598 {
5599 	struct drm_i915_private *dev_priv = o->base.dev->dev_private;
5600 	struct i915_vma *vma;
5601 
5602 	if (!dev_priv->mm.aliasing_ppgtt ||
5603 	    vm == &dev_priv->mm.aliasing_ppgtt->base)
5604 		vm = &dev_priv->gtt.base;
5605 
5606 	BUG_ON(list_empty(&o->vma_list));
5607 	list_for_each_entry(vma, &o->vma_list, vma_link) {
5608 		if (vma->vm == vm)
5609 			return vma->node.start;
5610 
5611 	}
5612 	return -1;
5613 }
5614 
i915_gem_obj_bound(struct drm_i915_gem_object * o,struct i915_address_space * vm)5615 bool i915_gem_obj_bound(struct drm_i915_gem_object *o,
5616 			struct i915_address_space *vm)
5617 {
5618 	struct i915_vma *vma;
5619 
5620 	list_for_each_entry(vma, &o->vma_list, vma_link)
5621 		if (vma->vm == vm && drm_mm_node_allocated(&vma->node))
5622 			return true;
5623 
5624 	return false;
5625 }
5626 
i915_gem_obj_bound_any(struct drm_i915_gem_object * o)5627 bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o)
5628 {
5629 	struct i915_vma *vma;
5630 
5631 	list_for_each_entry(vma, &o->vma_list, vma_link)
5632 		if (drm_mm_node_allocated(&vma->node))
5633 			return true;
5634 
5635 	return false;
5636 }
5637 
i915_gem_obj_size(struct drm_i915_gem_object * o,struct i915_address_space * vm)5638 unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o,
5639 				struct i915_address_space *vm)
5640 {
5641 	struct drm_i915_private *dev_priv = o->base.dev->dev_private;
5642 	struct i915_vma *vma;
5643 
5644 	if (!dev_priv->mm.aliasing_ppgtt ||
5645 	    vm == &dev_priv->mm.aliasing_ppgtt->base)
5646 		vm = &dev_priv->gtt.base;
5647 
5648 	BUG_ON(list_empty(&o->vma_list));
5649 
5650 	list_for_each_entry(vma, &o->vma_list, vma_link)
5651 		if (vma->vm == vm)
5652 			return vma->node.size;
5653 
5654 	return 0;
5655 }
5656 
5657 static unsigned long
i915_gem_inactive_scan(struct shrinker * shrinker,struct shrink_control * sc)5658 i915_gem_inactive_scan(struct shrinker *shrinker, struct shrink_control *sc)
5659 {
5660 #ifdef __NetBSD__		/* XXX shrinkers */
5661 	return 0;
5662 #else
5663 	struct drm_i915_private *dev_priv =
5664 		container_of(shrinker,
5665 			     struct drm_i915_private,
5666 			     mm.inactive_shrinker);
5667 	struct drm_device *dev = dev_priv->dev;
5668 	unsigned long freed;
5669 	bool unlock = true;
5670 
5671 	if (!mutex_trylock(&dev->struct_mutex)) {
5672 		if (!mutex_is_locked_by(&dev->struct_mutex, current))
5673 			return SHRINK_STOP;
5674 
5675 		if (dev_priv->mm.shrinker_no_lock_stealing)
5676 			return SHRINK_STOP;
5677 
5678 		unlock = false;
5679 	}
5680 
5681 	freed = i915_gem_purge(dev_priv, sc->nr_to_scan);
5682 	if (freed < sc->nr_to_scan)
5683 		freed += __i915_gem_shrink(dev_priv,
5684 					   sc->nr_to_scan - freed,
5685 					   false);
5686 	if (freed < sc->nr_to_scan)
5687 		freed += i915_gem_shrink_all(dev_priv);
5688 
5689 	if (unlock)
5690 		mutex_unlock(&dev->struct_mutex);
5691 
5692 	return freed;
5693 #endif
5694 }
5695 
i915_gem_obj_to_ggtt(struct drm_i915_gem_object * obj)5696 struct i915_vma *i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj)
5697 {
5698 	struct i915_vma *vma;
5699 
5700 	/* This WARN has probably outlived its usefulness (callers already
5701 	 * WARN if they don't find the GGTT vma they expect). When removing,
5702 	 * remember to remove the pre-check in is_pin_display() as well */
5703 	if (WARN_ON(list_empty(&obj->vma_list)))
5704 		return NULL;
5705 
5706 	vma = list_first_entry(&obj->vma_list, typeof(*vma), vma_link);
5707 	if (vma->vm != obj_to_ggtt(obj))
5708 		return NULL;
5709 
5710 	return vma;
5711 }
5712