1 /*
2 * Copyright © 2008-2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric@anholt.net>
25 *
26 */
27
28 #include <drm/drmP.h>
29 #include <drm/drm_vma_manager.h>
30 #include <drm/i915_drm.h>
31 #include "i915_drv.h"
32 #include "i915_gem_clflush.h"
33 #include "i915_vgpu.h"
34 #include "i915_trace.h"
35 #include "intel_drv.h"
36 #include "intel_frontbuffer.h"
37 #include "intel_mocs.h"
38 #include "i915_gemfs.h"
39 #include <linux/dma-fence-array.h>
40 #include <linux/kthread.h>
41 #include <linux/reservation.h>
42 #include <linux/shmem_fs.h>
43 #include <linux/slab.h>
44 #include <linux/stop_machine.h>
45 #include <linux/swap.h>
46 #include <linux/pci.h>
47 #include <linux/dma-buf.h>
48 #include <linux/swiotlb.h>
49
50 #include <sys/mman.h>
51 #include <vm/vm_map.h>
52 #include <vm/vm_param.h>
53
54 #undef USE_INSERT
55
56 static void i915_gem_flush_free_objects(struct drm_i915_private *i915);
57
cpu_write_needs_clflush(struct drm_i915_gem_object * obj)58 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
59 {
60 if (obj->cache_dirty)
61 return false;
62
63 if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
64 return true;
65
66 return obj->pin_global; /* currently in use by HW, keep flushed */
67 }
68
69 static int
insert_mappable_node(struct i915_ggtt * ggtt,struct drm_mm_node * node,u32 size)70 insert_mappable_node(struct i915_ggtt *ggtt,
71 struct drm_mm_node *node, u32 size)
72 {
73 memset(node, 0, sizeof(*node));
74 return drm_mm_insert_node_in_range(&ggtt->base.mm, node,
75 size, 0, I915_COLOR_UNEVICTABLE,
76 0, ggtt->mappable_end,
77 DRM_MM_INSERT_LOW);
78 }
79
80 static void
remove_mappable_node(struct drm_mm_node * node)81 remove_mappable_node(struct drm_mm_node *node)
82 {
83 drm_mm_remove_node(node);
84 }
85
86 /* some bookkeeping */
i915_gem_info_add_obj(struct drm_i915_private * dev_priv,u64 size)87 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
88 u64 size)
89 {
90 lockmgr(&dev_priv->mm.object_stat_lock, LK_EXCLUSIVE);
91 dev_priv->mm.object_count++;
92 dev_priv->mm.object_memory += size;
93 lockmgr(&dev_priv->mm.object_stat_lock, LK_RELEASE);
94 }
95
i915_gem_info_remove_obj(struct drm_i915_private * dev_priv,u64 size)96 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
97 u64 size)
98 {
99 lockmgr(&dev_priv->mm.object_stat_lock, LK_EXCLUSIVE);
100 dev_priv->mm.object_count--;
101 dev_priv->mm.object_memory -= size;
102 lockmgr(&dev_priv->mm.object_stat_lock, LK_RELEASE);
103 }
104
105 static int
i915_gem_wait_for_error(struct i915_gpu_error * error)106 i915_gem_wait_for_error(struct i915_gpu_error *error)
107 {
108 int ret;
109
110 might_sleep();
111
112 /*
113 * Only wait 10 seconds for the gpu reset to complete to avoid hanging
114 * userspace. If it takes that long something really bad is going on and
115 * we should simply try to bail out and fail as gracefully as possible.
116 */
117 ret = wait_event_interruptible_timeout(error->reset_queue,
118 !i915_reset_backoff(error),
119 I915_RESET_TIMEOUT);
120 if (ret == 0) {
121 DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
122 return -EIO;
123 } else if (ret < 0) {
124 return ret;
125 } else {
126 return 0;
127 }
128 }
129
i915_mutex_lock_interruptible(struct drm_device * dev)130 int i915_mutex_lock_interruptible(struct drm_device *dev)
131 {
132 struct drm_i915_private *dev_priv = to_i915(dev);
133 int ret;
134
135 ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
136 if (ret)
137 return ret;
138
139 ret = mutex_lock_interruptible(&dev->struct_mutex);
140 if (ret)
141 return ret;
142
143 return 0;
144 }
145
146 int
i915_gem_get_aperture_ioctl(struct drm_device * dev,void * data,struct drm_file * file)147 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
148 struct drm_file *file)
149 {
150 struct drm_i915_private *dev_priv = to_i915(dev);
151 struct i915_ggtt *ggtt = &dev_priv->ggtt;
152 struct drm_i915_gem_get_aperture *args = data;
153 struct i915_vma *vma;
154 u64 pinned;
155
156 pinned = ggtt->base.reserved;
157 mutex_lock(&dev->struct_mutex);
158 list_for_each_entry(vma, &ggtt->base.active_list, vm_link)
159 if (i915_vma_is_pinned(vma))
160 pinned += vma->node.size;
161 list_for_each_entry(vma, &ggtt->base.inactive_list, vm_link)
162 if (i915_vma_is_pinned(vma))
163 pinned += vma->node.size;
164 mutex_unlock(&dev->struct_mutex);
165
166 args->aper_size = ggtt->base.total;
167 args->aper_available_size = args->aper_size - pinned;
168
169 return 0;
170 }
171
i915_gem_object_get_pages_phys(struct drm_i915_gem_object * obj)172 static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
173 {
174 #if 0
175 struct address_space *mapping = obj->base.filp->f_mapping;
176 #else
177 vm_object_t vm_obj = obj->base.filp;
178 #endif
179 drm_dma_handle_t *phys;
180 struct sg_table *st;
181 struct scatterlist *sg;
182 char *vaddr;
183 int i;
184 int err;
185
186 if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
187 return -EINVAL;
188
189 /* Always aligning to the object size, allows a single allocation
190 * to handle all possible callers, and given typical object sizes,
191 * the alignment of the buddy allocation will naturally match.
192 */
193 phys = drm_pci_alloc(obj->base.dev,
194 roundup_pow_of_two(obj->base.size),
195 roundup_pow_of_two(obj->base.size));
196 if (!phys)
197 return -ENOMEM;
198
199 vaddr = phys->vaddr;
200 VM_OBJECT_LOCK(vm_obj);
201 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
202 struct page *page;
203 char *src;
204
205 #if 0
206 page = shmem_read_mapping_page(mapping, i);
207 #else
208 page = shmem_read_mapping_page(vm_obj, i);
209 #endif
210 if (IS_ERR(page)) {
211 err = PTR_ERR(page);
212 goto err_phys;
213 }
214
215 src = kmap_atomic(page);
216 memcpy(vaddr, src, PAGE_SIZE);
217 drm_clflush_virt_range(vaddr, PAGE_SIZE);
218 kunmap_atomic(src);
219
220 put_page(page);
221 vaddr += PAGE_SIZE;
222 }
223 VM_OBJECT_UNLOCK(vm_obj);
224
225 i915_gem_chipset_flush(to_i915(obj->base.dev));
226
227 st = kmalloc(sizeof(*st), M_DRM, GFP_KERNEL);
228 if (!st) {
229 err = -ENOMEM;
230 goto err_phys;
231 }
232
233 if (sg_alloc_table(st, 1, GFP_KERNEL)) {
234 kfree(st);
235 err = -ENOMEM;
236 goto err_phys;
237 }
238
239 sg = st->sgl;
240 sg->offset = 0;
241 sg->length = obj->base.size;
242
243 sg_dma_address(sg) = phys->busaddr;
244 sg_dma_len(sg) = obj->base.size;
245
246 obj->phys_handle = phys;
247
248 __i915_gem_object_set_pages(obj, st, sg->length);
249
250 return 0;
251
252 err_phys:
253 drm_pci_free(obj->base.dev, phys);
254
255 return err;
256 }
257
__start_cpu_write(struct drm_i915_gem_object * obj)258 static void __start_cpu_write(struct drm_i915_gem_object *obj)
259 {
260 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
261 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
262 if (cpu_write_needs_clflush(obj))
263 obj->cache_dirty = true;
264 }
265
266 static void
__i915_gem_object_release_shmem(struct drm_i915_gem_object * obj,struct sg_table * pages,bool needs_clflush)267 __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
268 struct sg_table *pages,
269 bool needs_clflush)
270 {
271 GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED);
272
273 if (obj->mm.madv == I915_MADV_DONTNEED)
274 obj->mm.dirty = false;
275
276 if (needs_clflush &&
277 (obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
278 !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
279 drm_clflush_sg(pages);
280
281 __start_cpu_write(obj);
282 }
283
284 static void
i915_gem_object_put_pages_phys(struct drm_i915_gem_object * obj,struct sg_table * pages)285 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj,
286 struct sg_table *pages)
287 {
288 __i915_gem_object_release_shmem(obj, pages, false);
289
290 if (obj->mm.dirty) {
291 #if 0
292 struct address_space *mapping = obj->base.filp->f_mapping;
293 #else
294 vm_object_t vm_obj = obj->base.filp;
295 #endif
296 char *vaddr = obj->phys_handle->vaddr;
297 int i;
298
299 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
300 struct page *page;
301 char *dst;
302
303 page = shmem_read_mapping_page(vm_obj, i);
304 if (IS_ERR(page))
305 continue;
306
307 dst = kmap_atomic(page);
308 drm_clflush_virt_range(vaddr, PAGE_SIZE);
309 memcpy(dst, vaddr, PAGE_SIZE);
310 kunmap_atomic(dst);
311
312 set_page_dirty(page);
313 if (obj->mm.madv == I915_MADV_WILLNEED)
314 mark_page_accessed(page);
315 put_page(page);
316 vaddr += PAGE_SIZE;
317 }
318 obj->mm.dirty = false;
319 }
320
321 sg_free_table(pages);
322 kfree(pages);
323
324 drm_pci_free(obj->base.dev, obj->phys_handle);
325 }
326
327 static void
i915_gem_object_release_phys(struct drm_i915_gem_object * obj)328 i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
329 {
330 i915_gem_object_unpin_pages(obj);
331 }
332
333 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
334 .get_pages = i915_gem_object_get_pages_phys,
335 .put_pages = i915_gem_object_put_pages_phys,
336 .release = i915_gem_object_release_phys,
337 };
338
339 static const struct drm_i915_gem_object_ops i915_gem_object_ops;
340
i915_gem_object_unbind(struct drm_i915_gem_object * obj)341 int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
342 {
343 struct i915_vma *vma;
344 LINUX_LIST_HEAD(still_in_list);
345 int ret;
346
347 lockdep_assert_held(&obj->base.dev->struct_mutex);
348
349 /* Closed vma are removed from the obj->vma_list - but they may
350 * still have an active binding on the object. To remove those we
351 * must wait for all rendering to complete to the object (as unbinding
352 * must anyway), and retire the requests.
353 */
354 ret = i915_gem_object_set_to_cpu_domain(obj, false);
355 if (ret)
356 return ret;
357
358 while ((vma = list_first_entry_or_null(&obj->vma_list,
359 struct i915_vma,
360 obj_link))) {
361 list_move_tail(&vma->obj_link, &still_in_list);
362 ret = i915_vma_unbind(vma);
363 if (ret)
364 break;
365 }
366 list_splice(&still_in_list, &obj->vma_list);
367
368 return ret;
369 }
370
371 static long
i915_gem_object_wait_fence(struct dma_fence * fence,unsigned int flags,long timeout,struct intel_rps_client * rps_client)372 i915_gem_object_wait_fence(struct dma_fence *fence,
373 unsigned int flags,
374 long timeout,
375 struct intel_rps_client *rps_client)
376 {
377 struct drm_i915_gem_request *rq;
378
379 BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1);
380
381 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
382 return timeout;
383
384 if (!dma_fence_is_i915(fence))
385 return dma_fence_wait_timeout(fence,
386 flags & I915_WAIT_INTERRUPTIBLE,
387 timeout);
388
389 rq = to_request(fence);
390 if (i915_gem_request_completed(rq))
391 goto out;
392
393 /* This client is about to stall waiting for the GPU. In many cases
394 * this is undesirable and limits the throughput of the system, as
395 * many clients cannot continue processing user input/output whilst
396 * blocked. RPS autotuning may take tens of milliseconds to respond
397 * to the GPU load and thus incurs additional latency for the client.
398 * We can circumvent that by promoting the GPU frequency to maximum
399 * before we wait. This makes the GPU throttle up much more quickly
400 * (good for benchmarks and user experience, e.g. window animations),
401 * but at a cost of spending more power processing the workload
402 * (bad for battery). Not all clients even want their results
403 * immediately and for them we should just let the GPU select its own
404 * frequency to maximise efficiency. To prevent a single client from
405 * forcing the clocks too high for the whole system, we only allow
406 * each client to waitboost once in a busy period.
407 */
408 if (rps_client) {
409 if (INTEL_GEN(rq->i915) >= 6)
410 gen6_rps_boost(rq, rps_client);
411 else
412 rps_client = NULL;
413 }
414
415 timeout = i915_wait_request(rq, flags, timeout);
416
417 out:
418 if (flags & I915_WAIT_LOCKED && i915_gem_request_completed(rq))
419 i915_gem_request_retire_upto(rq);
420
421 return timeout;
422 }
423
424 static long
i915_gem_object_wait_reservation(struct reservation_object * resv,unsigned int flags,long timeout,struct intel_rps_client * rps_client)425 i915_gem_object_wait_reservation(struct reservation_object *resv,
426 unsigned int flags,
427 long timeout,
428 struct intel_rps_client *rps_client)
429 {
430 unsigned int seq = __read_seqcount_begin(&resv->seq);
431 struct dma_fence *excl;
432 bool prune_fences = false;
433
434 if (flags & I915_WAIT_ALL) {
435 struct dma_fence **shared;
436 unsigned int count, i;
437 int ret;
438
439 ret = reservation_object_get_fences_rcu(resv,
440 &excl, &count, &shared);
441 if (ret)
442 return ret;
443
444 for (i = 0; i < count; i++) {
445 timeout = i915_gem_object_wait_fence(shared[i],
446 flags, timeout,
447 rps_client);
448 if (timeout < 0)
449 break;
450
451 dma_fence_put(shared[i]);
452 }
453
454 for (; i < count; i++)
455 dma_fence_put(shared[i]);
456 kfree(shared);
457
458 prune_fences = count && timeout >= 0;
459 } else {
460 excl = reservation_object_get_excl_rcu(resv);
461 }
462
463 if (excl && timeout >= 0) {
464 timeout = i915_gem_object_wait_fence(excl, flags, timeout,
465 rps_client);
466 prune_fences = timeout >= 0;
467 }
468
469 dma_fence_put(excl);
470
471 /* Oportunistically prune the fences iff we know they have *all* been
472 * signaled and that the reservation object has not been changed (i.e.
473 * no new fences have been added).
474 */
475 if (prune_fences && !__read_seqcount_retry(&resv->seq, seq)) {
476 if (reservation_object_trylock(resv)) {
477 if (!__read_seqcount_retry(&resv->seq, seq))
478 reservation_object_add_excl_fence(resv, NULL);
479 reservation_object_unlock(resv);
480 }
481 }
482
483 return timeout;
484 }
485
__fence_set_priority(struct dma_fence * fence,int prio)486 static void __fence_set_priority(struct dma_fence *fence, int prio)
487 {
488 struct drm_i915_gem_request *rq;
489 struct intel_engine_cs *engine;
490
491 if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence))
492 return;
493
494 rq = to_request(fence);
495 engine = rq->engine;
496 if (!engine->schedule)
497 return;
498
499 engine->schedule(rq, prio);
500 }
501
fence_set_priority(struct dma_fence * fence,int prio)502 static void fence_set_priority(struct dma_fence *fence, int prio)
503 {
504 /* Recurse once into a fence-array */
505 if (dma_fence_is_array(fence)) {
506 struct dma_fence_array *array = to_dma_fence_array(fence);
507 int i;
508
509 for (i = 0; i < array->num_fences; i++)
510 __fence_set_priority(array->fences[i], prio);
511 } else {
512 __fence_set_priority(fence, prio);
513 }
514 }
515
516 int
i915_gem_object_wait_priority(struct drm_i915_gem_object * obj,unsigned int flags,int prio)517 i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
518 unsigned int flags,
519 int prio)
520 {
521 struct dma_fence *excl;
522
523 if (flags & I915_WAIT_ALL) {
524 struct dma_fence **shared;
525 unsigned int count, i;
526 int ret;
527
528 ret = reservation_object_get_fences_rcu(obj->resv,
529 &excl, &count, &shared);
530 if (ret)
531 return ret;
532
533 for (i = 0; i < count; i++) {
534 fence_set_priority(shared[i], prio);
535 dma_fence_put(shared[i]);
536 }
537
538 kfree(shared);
539 } else {
540 excl = reservation_object_get_excl_rcu(obj->resv);
541 }
542
543 if (excl) {
544 fence_set_priority(excl, prio);
545 dma_fence_put(excl);
546 }
547 return 0;
548 }
549
550 /**
551 * Waits for rendering to the object to be completed
552 * @obj: i915 gem object
553 * @flags: how to wait (under a lock, for all rendering or just for writes etc)
554 * @timeout: how long to wait
555 * @rps: client (user process) to charge for any waitboosting
556 */
557 int
i915_gem_object_wait(struct drm_i915_gem_object * obj,unsigned int flags,long timeout,struct intel_rps_client * rps_client)558 i915_gem_object_wait(struct drm_i915_gem_object *obj,
559 unsigned int flags,
560 long timeout,
561 struct intel_rps_client *rps_client)
562 {
563 might_sleep();
564 #if IS_ENABLED(CONFIG_LOCKDEP)
565 GEM_BUG_ON(debug_locks &&
566 !!lockdep_is_held(&obj->base.dev->struct_mutex) !=
567 !!(flags & I915_WAIT_LOCKED));
568 #endif
569 GEM_BUG_ON(timeout < 0);
570
571 timeout = i915_gem_object_wait_reservation(obj->resv,
572 flags, timeout,
573 rps_client);
574 return timeout < 0 ? timeout : 0;
575 }
576
to_rps_client(struct drm_file * file)577 static struct intel_rps_client *to_rps_client(struct drm_file *file)
578 {
579 struct drm_i915_file_private *fpriv = file->driver_priv;
580
581 return &fpriv->rps_client;
582 }
583
584 static int
i915_gem_phys_pwrite(struct drm_i915_gem_object * obj,struct drm_i915_gem_pwrite * args,struct drm_file * file)585 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
586 struct drm_i915_gem_pwrite *args,
587 struct drm_file *file)
588 {
589 void *vaddr = obj->phys_handle->vaddr + args->offset;
590 char __user *user_data = u64_to_user_ptr(args->data_ptr);
591
592 /* We manually control the domain here and pretend that it
593 * remains coherent i.e. in the GTT domain, like shmem_pwrite.
594 */
595 intel_fb_obj_invalidate(obj, ORIGIN_CPU);
596 if (copy_from_user(vaddr, user_data, args->size))
597 return -EFAULT;
598
599 drm_clflush_virt_range(vaddr, args->size);
600 i915_gem_chipset_flush(to_i915(obj->base.dev));
601
602 intel_fb_obj_flush(obj, ORIGIN_CPU);
603 return 0;
604 }
605
i915_gem_object_alloc(struct drm_i915_private * dev_priv)606 void *i915_gem_object_alloc(struct drm_i915_private *dev_priv)
607 {
608 return kmem_cache_zalloc(dev_priv->objects, GFP_KERNEL);
609 }
610
i915_gem_object_free(struct drm_i915_gem_object * obj)611 void i915_gem_object_free(struct drm_i915_gem_object *obj)
612 {
613 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
614 kmem_cache_free(dev_priv->objects, obj);
615 }
616
617 static int
i915_gem_create(struct drm_file * file,struct drm_i915_private * dev_priv,uint64_t size,uint32_t * handle_p)618 i915_gem_create(struct drm_file *file,
619 struct drm_i915_private *dev_priv,
620 uint64_t size,
621 uint32_t *handle_p)
622 {
623 struct drm_i915_gem_object *obj;
624 int ret;
625 u32 handle;
626
627 size = roundup(size, PAGE_SIZE);
628 if (size == 0)
629 return -EINVAL;
630
631 /* Allocate the new object */
632 obj = i915_gem_object_create(dev_priv, size);
633 if (IS_ERR(obj))
634 return PTR_ERR(obj);
635
636 ret = drm_gem_handle_create(file, &obj->base, &handle);
637 /* drop reference from allocate - handle holds it now */
638 i915_gem_object_put(obj);
639 if (ret)
640 return ret;
641
642 *handle_p = handle;
643 return 0;
644 }
645
646 int
i915_gem_dumb_create(struct drm_file * file,struct drm_device * dev,struct drm_mode_create_dumb * args)647 i915_gem_dumb_create(struct drm_file *file,
648 struct drm_device *dev,
649 struct drm_mode_create_dumb *args)
650 {
651 /* have to work out size/pitch and return them */
652 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64);
653 args->size = args->pitch * args->height;
654 return i915_gem_create(file, to_i915(dev),
655 args->size, &args->handle);
656 }
657
gpu_write_needs_clflush(struct drm_i915_gem_object * obj)658 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
659 {
660 return !(obj->cache_level == I915_CACHE_NONE ||
661 obj->cache_level == I915_CACHE_WT);
662 }
663
664 /**
665 * Creates a new mm object and returns a handle to it.
666 * @dev: drm device pointer
667 * @data: ioctl data blob
668 * @file: drm file pointer
669 */
670 int
i915_gem_create_ioctl(struct drm_device * dev,void * data,struct drm_file * file)671 i915_gem_create_ioctl(struct drm_device *dev, void *data,
672 struct drm_file *file)
673 {
674 struct drm_i915_private *dev_priv = to_i915(dev);
675 struct drm_i915_gem_create *args = data;
676
677 i915_gem_flush_free_objects(dev_priv);
678
679 return i915_gem_create(file, dev_priv,
680 args->size, &args->handle);
681 }
682
683 static inline enum fb_op_origin
fb_write_origin(struct drm_i915_gem_object * obj,unsigned int domain)684 fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
685 {
686 return (domain == I915_GEM_DOMAIN_GTT ?
687 obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
688 }
689
690 static void
flush_write_domain(struct drm_i915_gem_object * obj,unsigned int flush_domains)691 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
692 {
693 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
694
695 if (!(obj->base.write_domain & flush_domains))
696 return;
697
698 /* No actual flushing is required for the GTT write domain. Writes
699 * to it "immediately" go to main memory as far as we know, so there's
700 * no chipset flush. It also doesn't land in render cache.
701 *
702 * However, we do have to enforce the order so that all writes through
703 * the GTT land before any writes to the device, such as updates to
704 * the GATT itself.
705 *
706 * We also have to wait a bit for the writes to land from the GTT.
707 * An uncached read (i.e. mmio) seems to be ideal for the round-trip
708 * timing. This issue has only been observed when switching quickly
709 * between GTT writes and CPU reads from inside the kernel on recent hw,
710 * and it appears to only affect discrete GTT blocks (i.e. on LLC
711 * system agents we cannot reproduce this behaviour).
712 */
713 wmb();
714
715 switch (obj->base.write_domain) {
716 case I915_GEM_DOMAIN_GTT:
717 if (!HAS_LLC(dev_priv)) {
718 intel_runtime_pm_get(dev_priv);
719 spin_lock_irq(&dev_priv->uncore.lock);
720 POSTING_READ_FW(RING_HEAD(dev_priv->engine[RCS]->mmio_base));
721 spin_unlock_irq(&dev_priv->uncore.lock);
722 intel_runtime_pm_put(dev_priv);
723 }
724
725 intel_fb_obj_flush(obj,
726 fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
727 break;
728
729 case I915_GEM_DOMAIN_CPU:
730 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
731 break;
732
733 case I915_GEM_DOMAIN_RENDER:
734 if (gpu_write_needs_clflush(obj))
735 obj->cache_dirty = true;
736 break;
737 }
738
739 obj->base.write_domain = 0;
740 }
741
742 static inline int
__copy_to_user_swizzled(char __user * cpu_vaddr,const char * gpu_vaddr,int gpu_offset,int length)743 __copy_to_user_swizzled(char __user *cpu_vaddr,
744 const char *gpu_vaddr, int gpu_offset,
745 int length)
746 {
747 int ret, cpu_offset = 0;
748
749 while (length > 0) {
750 int cacheline_end = ALIGN(gpu_offset + 1, 64);
751 int this_length = min(cacheline_end - gpu_offset, length);
752 int swizzled_gpu_offset = gpu_offset ^ 64;
753
754 ret = __copy_to_user(cpu_vaddr + cpu_offset,
755 gpu_vaddr + swizzled_gpu_offset,
756 this_length);
757 if (ret)
758 return ret + length;
759
760 cpu_offset += this_length;
761 gpu_offset += this_length;
762 length -= this_length;
763 }
764
765 return 0;
766 }
767
768 static inline int
__copy_from_user_swizzled(char * gpu_vaddr,int gpu_offset,const char __user * cpu_vaddr,int length)769 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
770 const char __user *cpu_vaddr,
771 int length)
772 {
773 int ret, cpu_offset = 0;
774
775 while (length > 0) {
776 int cacheline_end = ALIGN(gpu_offset + 1, 64);
777 int this_length = min(cacheline_end - gpu_offset, length);
778 int swizzled_gpu_offset = gpu_offset ^ 64;
779
780 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
781 cpu_vaddr + cpu_offset,
782 this_length);
783 if (ret)
784 return ret + length;
785
786 cpu_offset += this_length;
787 gpu_offset += this_length;
788 length -= this_length;
789 }
790
791 return 0;
792 }
793
794 /*
795 * Pins the specified object's pages and synchronizes the object with
796 * GPU accesses. Sets needs_clflush to non-zero if the caller should
797 * flush the object from the CPU cache.
798 */
i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object * obj,unsigned int * needs_clflush)799 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
800 unsigned int *needs_clflush)
801 {
802 int ret;
803
804 lockdep_assert_held(&obj->base.dev->struct_mutex);
805
806 *needs_clflush = 0;
807 if (!i915_gem_object_has_struct_page(obj))
808 return -ENODEV;
809
810 ret = i915_gem_object_wait(obj,
811 I915_WAIT_INTERRUPTIBLE |
812 I915_WAIT_LOCKED,
813 MAX_SCHEDULE_TIMEOUT,
814 NULL);
815 if (ret)
816 return ret;
817
818 ret = i915_gem_object_pin_pages(obj);
819 if (ret)
820 return ret;
821
822 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
823 !static_cpu_has(X86_FEATURE_CLFLUSH)) {
824 ret = i915_gem_object_set_to_cpu_domain(obj, false);
825 if (ret)
826 goto err_unpin;
827 else
828 goto out;
829 }
830
831 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
832
833 /* If we're not in the cpu read domain, set ourself into the gtt
834 * read domain and manually flush cachelines (if required). This
835 * optimizes for the case when the gpu will dirty the data
836 * anyway again before the next pread happens.
837 */
838 if (!obj->cache_dirty &&
839 !(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
840 *needs_clflush = CLFLUSH_BEFORE;
841
842 out:
843 /* return with the pages pinned */
844 return 0;
845
846 err_unpin:
847 i915_gem_object_unpin_pages(obj);
848 return ret;
849 }
850
i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object * obj,unsigned int * needs_clflush)851 int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
852 unsigned int *needs_clflush)
853 {
854 int ret;
855
856 lockdep_assert_held(&obj->base.dev->struct_mutex);
857
858 *needs_clflush = 0;
859 if (!i915_gem_object_has_struct_page(obj))
860 return -ENODEV;
861
862 ret = i915_gem_object_wait(obj,
863 I915_WAIT_INTERRUPTIBLE |
864 I915_WAIT_LOCKED |
865 I915_WAIT_ALL,
866 MAX_SCHEDULE_TIMEOUT,
867 NULL);
868 if (ret)
869 return ret;
870
871 ret = i915_gem_object_pin_pages(obj);
872 if (ret)
873 return ret;
874
875 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
876 !static_cpu_has(X86_FEATURE_CLFLUSH)) {
877 ret = i915_gem_object_set_to_cpu_domain(obj, true);
878 if (ret)
879 goto err_unpin;
880 else
881 goto out;
882 }
883
884 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
885
886 /* If we're not in the cpu write domain, set ourself into the
887 * gtt write domain and manually flush cachelines (as required).
888 * This optimizes for the case when the gpu will use the data
889 * right away and we therefore have to clflush anyway.
890 */
891 if (!obj->cache_dirty) {
892 *needs_clflush |= CLFLUSH_AFTER;
893
894 /*
895 * Same trick applies to invalidate partially written
896 * cachelines read before writing.
897 */
898 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
899 *needs_clflush |= CLFLUSH_BEFORE;
900 }
901
902 out:
903 intel_fb_obj_invalidate(obj, ORIGIN_CPU);
904 obj->mm.dirty = true;
905 /* return with the pages pinned */
906 return 0;
907
908 err_unpin:
909 i915_gem_object_unpin_pages(obj);
910 return ret;
911 }
912
913 static void
shmem_clflush_swizzled_range(char * addr,unsigned long length,bool swizzled)914 shmem_clflush_swizzled_range(char *addr, unsigned long length,
915 bool swizzled)
916 {
917 if (unlikely(swizzled)) {
918 unsigned long start = (unsigned long) addr;
919 unsigned long end = (unsigned long) addr + length;
920
921 /* For swizzling simply ensure that we always flush both
922 * channels. Lame, but simple and it works. Swizzled
923 * pwrite/pread is far from a hotpath - current userspace
924 * doesn't use it at all. */
925 start = round_down(start, 128);
926 end = round_up(end, 128);
927
928 drm_clflush_virt_range((void *)start, end - start);
929 } else {
930 drm_clflush_virt_range(addr, length);
931 }
932
933 }
934
935 /* Only difference to the fast-path function is that this can handle bit17
936 * and uses non-atomic copy and kmap functions. */
937 static int
shmem_pread_slow(struct page * page,int offset,int length,char __user * user_data,bool page_do_bit17_swizzling,bool needs_clflush)938 shmem_pread_slow(struct page *page, int offset, int length,
939 char __user *user_data,
940 bool page_do_bit17_swizzling, bool needs_clflush)
941 {
942 char *vaddr;
943 int ret;
944
945 vaddr = kmap(page);
946 if (needs_clflush)
947 shmem_clflush_swizzled_range(vaddr + offset, length,
948 page_do_bit17_swizzling);
949
950 if (page_do_bit17_swizzling)
951 ret = __copy_to_user_swizzled(user_data, vaddr, offset, length);
952 else
953 ret = __copy_to_user(user_data, vaddr + offset, length);
954 kunmap(page);
955
956 return ret ? - EFAULT : 0;
957 }
958
959 static int
shmem_pread(struct page * page,int offset,int length,char __user * user_data,bool page_do_bit17_swizzling,bool needs_clflush)960 shmem_pread(struct page *page, int offset, int length, char __user *user_data,
961 bool page_do_bit17_swizzling, bool needs_clflush)
962 {
963 int ret;
964
965 ret = -ENODEV;
966 if (!page_do_bit17_swizzling) {
967 char *vaddr = kmap_atomic(page);
968
969 if (needs_clflush)
970 drm_clflush_virt_range(vaddr + offset, length);
971 ret = __copy_to_user_inatomic(user_data, vaddr + offset, length);
972 kunmap_atomic(vaddr);
973 }
974 if (ret == 0)
975 return 0;
976
977 return shmem_pread_slow(page, offset, length, user_data,
978 page_do_bit17_swizzling, needs_clflush);
979 }
980
981 static int
i915_gem_shmem_pread(struct drm_i915_gem_object * obj,struct drm_i915_gem_pread * args)982 i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
983 struct drm_i915_gem_pread *args)
984 {
985 char __user *user_data;
986 u64 remain;
987 unsigned int obj_do_bit17_swizzling;
988 unsigned int needs_clflush;
989 unsigned int idx, offset;
990 int ret;
991
992 obj_do_bit17_swizzling = 0;
993 if (i915_gem_object_needs_bit17_swizzle(obj))
994 obj_do_bit17_swizzling = BIT(17);
995
996 ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex);
997 if (ret)
998 return ret;
999
1000 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
1001 mutex_unlock(&obj->base.dev->struct_mutex);
1002 if (ret)
1003 return ret;
1004
1005 remain = args->size;
1006 user_data = u64_to_user_ptr(args->data_ptr);
1007 offset = offset_in_page(args->offset);
1008 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) {
1009 struct page *page = i915_gem_object_get_page(obj, idx);
1010 int length;
1011
1012 length = remain;
1013 if (offset + length > PAGE_SIZE)
1014 length = PAGE_SIZE - offset;
1015
1016 ret = shmem_pread(page, offset, length, user_data,
1017 page_to_phys(page) & obj_do_bit17_swizzling,
1018 needs_clflush);
1019 if (ret)
1020 break;
1021
1022 remain -= length;
1023 user_data += length;
1024 offset = 0;
1025 }
1026
1027 i915_gem_obj_finish_shmem_access(obj);
1028 return ret;
1029 }
1030
1031 static inline bool
gtt_user_read(struct io_mapping * mapping,loff_t base,int offset,char __user * user_data,int length)1032 gtt_user_read(struct io_mapping *mapping,
1033 loff_t base, int offset,
1034 char __user *user_data, int length)
1035 {
1036 void __iomem *vaddr;
1037 unsigned long unwritten;
1038
1039 /* We can use the cpu mem copy function because this is X86. */
1040 vaddr = io_mapping_map_atomic_wc(mapping, base);
1041 unwritten = __copy_to_user_inatomic(user_data,
1042 (void __force *)vaddr + offset,
1043 length);
1044 io_mapping_unmap_atomic(vaddr);
1045 if (unwritten) {
1046 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE);
1047 unwritten = copy_to_user(user_data,
1048 (void __force *)vaddr + offset,
1049 length);
1050 io_mapping_unmap(vaddr);
1051 }
1052 return unwritten;
1053 }
1054
1055 static int
i915_gem_gtt_pread(struct drm_i915_gem_object * obj,const struct drm_i915_gem_pread * args)1056 i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
1057 const struct drm_i915_gem_pread *args)
1058 {
1059 struct drm_i915_private *i915 = to_i915(obj->base.dev);
1060 struct i915_ggtt *ggtt = &i915->ggtt;
1061 struct drm_mm_node node;
1062 struct i915_vma *vma;
1063 void __user *user_data;
1064 u64 remain, offset;
1065 int ret;
1066
1067 ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
1068 if (ret)
1069 return ret;
1070
1071 intel_runtime_pm_get(i915);
1072 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
1073 PIN_MAPPABLE |
1074 PIN_NONFAULT |
1075 PIN_NONBLOCK);
1076 if (!IS_ERR(vma)) {
1077 node.start = i915_ggtt_offset(vma);
1078 node.allocated = false;
1079 ret = i915_vma_put_fence(vma);
1080 if (ret) {
1081 i915_vma_unpin(vma);
1082 vma = ERR_PTR(ret);
1083 }
1084 }
1085 if (IS_ERR(vma)) {
1086 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
1087 if (ret)
1088 goto out_unlock;
1089 GEM_BUG_ON(!node.allocated);
1090 }
1091
1092 ret = i915_gem_object_set_to_gtt_domain(obj, false);
1093 if (ret)
1094 goto out_unpin;
1095
1096 mutex_unlock(&i915->drm.struct_mutex);
1097
1098 user_data = u64_to_user_ptr(args->data_ptr);
1099 remain = args->size;
1100 offset = args->offset;
1101
1102 while (remain > 0) {
1103 /* Operation in this page
1104 *
1105 * page_base = page offset within aperture
1106 * page_offset = offset within page
1107 * page_length = bytes to copy for this page
1108 */
1109 u32 page_base = node.start;
1110 unsigned page_offset = offset_in_page(offset);
1111 unsigned page_length = PAGE_SIZE - page_offset;
1112 page_length = remain < page_length ? remain : page_length;
1113 if (node.allocated) {
1114 wmb();
1115 ggtt->base.insert_page(&ggtt->base,
1116 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
1117 node.start, I915_CACHE_NONE, 0);
1118 wmb();
1119 } else {
1120 page_base += offset & LINUX_PAGE_MASK;
1121 }
1122
1123 if (gtt_user_read(&ggtt->mappable, page_base, page_offset,
1124 user_data, page_length)) {
1125 ret = -EFAULT;
1126 break;
1127 }
1128
1129 remain -= page_length;
1130 user_data += page_length;
1131 offset += page_length;
1132 }
1133
1134 mutex_lock(&i915->drm.struct_mutex);
1135 out_unpin:
1136 if (node.allocated) {
1137 wmb();
1138 ggtt->base.clear_range(&ggtt->base,
1139 node.start, node.size);
1140 remove_mappable_node(&node);
1141 } else {
1142 i915_vma_unpin(vma);
1143 }
1144 out_unlock:
1145 intel_runtime_pm_put(i915);
1146 mutex_unlock(&i915->drm.struct_mutex);
1147
1148 return ret;
1149 }
1150
1151 /**
1152 * Reads data from the object referenced by handle.
1153 * @dev: drm device pointer
1154 * @data: ioctl data blob
1155 * @file: drm file pointer
1156 *
1157 * On error, the contents of *data are undefined.
1158 */
1159 int
i915_gem_pread_ioctl(struct drm_device * dev,void * data,struct drm_file * file)1160 i915_gem_pread_ioctl(struct drm_device *dev, void *data,
1161 struct drm_file *file)
1162 {
1163 struct drm_i915_gem_pread *args = data;
1164 struct drm_i915_gem_object *obj;
1165 int ret;
1166
1167 if (args->size == 0)
1168 return 0;
1169
1170 #if 0
1171 if (!access_ok(VERIFY_WRITE,
1172 u64_to_user_ptr(args->data_ptr),
1173 args->size))
1174 return -EFAULT;
1175 #endif
1176
1177 obj = i915_gem_object_lookup(file, args->handle);
1178 if (!obj)
1179 return -ENOENT;
1180
1181 /* Bounds check source. */
1182 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) {
1183 ret = -EINVAL;
1184 goto out;
1185 }
1186
1187 trace_i915_gem_object_pread(obj, args->offset, args->size);
1188
1189 ret = i915_gem_object_wait(obj,
1190 I915_WAIT_INTERRUPTIBLE,
1191 MAX_SCHEDULE_TIMEOUT,
1192 to_rps_client(file));
1193 if (ret)
1194 goto out;
1195
1196 ret = i915_gem_object_pin_pages(obj);
1197 if (ret)
1198 goto out;
1199
1200 ret = i915_gem_shmem_pread(obj, args);
1201 if (ret == -EFAULT || ret == -ENODEV)
1202 ret = i915_gem_gtt_pread(obj, args);
1203
1204 i915_gem_object_unpin_pages(obj);
1205 out:
1206 i915_gem_object_put(obj);
1207 return ret;
1208 }
1209
1210 /* This is the fast write path which cannot handle
1211 * page faults in the source data
1212 */
1213
1214 static inline bool
ggtt_write(struct io_mapping * mapping,loff_t base,int offset,char __user * user_data,int length)1215 ggtt_write(struct io_mapping *mapping,
1216 loff_t base, int offset,
1217 char __user *user_data, int length)
1218 {
1219 void __iomem *vaddr;
1220 unsigned long unwritten;
1221
1222 /* We can use the cpu mem copy function because this is X86. */
1223 vaddr = io_mapping_map_atomic_wc(mapping, base);
1224 unwritten = __copy_from_user_inatomic_nocache((void __force *)vaddr + offset,
1225 user_data, length);
1226 io_mapping_unmap_atomic(vaddr);
1227 if (unwritten) {
1228 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE);
1229 unwritten = copy_from_user((void __force *)vaddr + offset,
1230 user_data, length);
1231 io_mapping_unmap(vaddr);
1232 }
1233
1234 return unwritten;
1235 }
1236
1237 /**
1238 * This is the fast pwrite path, where we copy the data directly from the
1239 * user into the GTT, uncached.
1240 * @obj: i915 GEM object
1241 * @args: pwrite arguments structure
1242 */
1243 static int
i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object * obj,const struct drm_i915_gem_pwrite * args)1244 i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
1245 const struct drm_i915_gem_pwrite *args)
1246 {
1247 struct drm_i915_private *i915 = to_i915(obj->base.dev);
1248 struct i915_ggtt *ggtt = &i915->ggtt;
1249 struct drm_mm_node node;
1250 struct i915_vma *vma;
1251 u64 remain, offset;
1252 void __user *user_data;
1253 int ret;
1254
1255 ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
1256 if (ret)
1257 return ret;
1258
1259 if (i915_gem_object_has_struct_page(obj)) {
1260 /*
1261 * Avoid waking the device up if we can fallback, as
1262 * waking/resuming is very slow (worst-case 10-100 ms
1263 * depending on PCI sleeps and our own resume time).
1264 * This easily dwarfs any performance advantage from
1265 * using the cache bypass of indirect GGTT access.
1266 */
1267 if (!intel_runtime_pm_get_if_in_use(i915)) {
1268 ret = -EFAULT;
1269 goto out_unlock;
1270 }
1271 } else {
1272 /* No backing pages, no fallback, we must force GGTT access */
1273 intel_runtime_pm_get(i915);
1274 }
1275
1276 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
1277 PIN_MAPPABLE |
1278 PIN_NONFAULT |
1279 PIN_NONBLOCK);
1280 if (!IS_ERR(vma)) {
1281 node.start = i915_ggtt_offset(vma);
1282 node.allocated = false;
1283 ret = i915_vma_put_fence(vma);
1284 if (ret) {
1285 i915_vma_unpin(vma);
1286 vma = ERR_PTR(ret);
1287 }
1288 }
1289 if (IS_ERR(vma)) {
1290 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
1291 if (ret)
1292 goto out_rpm;
1293 GEM_BUG_ON(!node.allocated);
1294 }
1295
1296 ret = i915_gem_object_set_to_gtt_domain(obj, true);
1297 if (ret)
1298 goto out_unpin;
1299
1300 mutex_unlock(&i915->drm.struct_mutex);
1301
1302 intel_fb_obj_invalidate(obj, ORIGIN_CPU);
1303
1304 user_data = u64_to_user_ptr(args->data_ptr);
1305 offset = args->offset;
1306 remain = args->size;
1307 while (remain) {
1308 /* Operation in this page
1309 *
1310 * page_base = page offset within aperture
1311 * page_offset = offset within page
1312 * page_length = bytes to copy for this page
1313 */
1314 u32 page_base = node.start;
1315 unsigned int page_offset = offset_in_page(offset);
1316 unsigned int page_length = PAGE_SIZE - page_offset;
1317 page_length = remain < page_length ? remain : page_length;
1318 if (node.allocated) {
1319 wmb(); /* flush the write before we modify the GGTT */
1320 ggtt->base.insert_page(&ggtt->base,
1321 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
1322 node.start, I915_CACHE_NONE, 0);
1323 wmb(); /* flush modifications to the GGTT (insert_page) */
1324 } else {
1325 page_base += offset & LINUX_PAGE_MASK;
1326 }
1327 /* If we get a fault while copying data, then (presumably) our
1328 * source page isn't available. Return the error and we'll
1329 * retry in the slow path.
1330 * If the object is non-shmem backed, we retry again with the
1331 * path that handles page fault.
1332 */
1333 if (ggtt_write(&ggtt->mappable, page_base, page_offset,
1334 user_data, page_length)) {
1335 ret = -EFAULT;
1336 break;
1337 }
1338
1339 remain -= page_length;
1340 user_data += page_length;
1341 offset += page_length;
1342 }
1343 intel_fb_obj_flush(obj, ORIGIN_CPU);
1344
1345 mutex_lock(&i915->drm.struct_mutex);
1346 out_unpin:
1347 if (node.allocated) {
1348 wmb();
1349 ggtt->base.clear_range(&ggtt->base,
1350 node.start, node.size);
1351 remove_mappable_node(&node);
1352 } else {
1353 i915_vma_unpin(vma);
1354 }
1355 out_rpm:
1356 intel_runtime_pm_put(i915);
1357 out_unlock:
1358 mutex_unlock(&i915->drm.struct_mutex);
1359 return ret;
1360 }
1361
1362 static int
shmem_pwrite_slow(struct page * page,int offset,int length,char __user * user_data,bool page_do_bit17_swizzling,bool needs_clflush_before,bool needs_clflush_after)1363 shmem_pwrite_slow(struct page *page, int offset, int length,
1364 char __user *user_data,
1365 bool page_do_bit17_swizzling,
1366 bool needs_clflush_before,
1367 bool needs_clflush_after)
1368 {
1369 char *vaddr;
1370 int ret;
1371
1372 vaddr = kmap(page);
1373 if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
1374 shmem_clflush_swizzled_range(vaddr + offset, length,
1375 page_do_bit17_swizzling);
1376 if (page_do_bit17_swizzling)
1377 ret = __copy_from_user_swizzled(vaddr, offset, user_data,
1378 length);
1379 else
1380 ret = __copy_from_user(vaddr + offset, user_data, length);
1381 if (needs_clflush_after)
1382 shmem_clflush_swizzled_range(vaddr + offset, length,
1383 page_do_bit17_swizzling);
1384 kunmap(page);
1385
1386 return ret ? -EFAULT : 0;
1387 }
1388
1389 /* Per-page copy function for the shmem pwrite fastpath.
1390 * Flushes invalid cachelines before writing to the target if
1391 * needs_clflush_before is set and flushes out any written cachelines after
1392 * writing if needs_clflush is set.
1393 */
1394 static int
shmem_pwrite(struct page * page,int offset,int len,char __user * user_data,bool page_do_bit17_swizzling,bool needs_clflush_before,bool needs_clflush_after)1395 shmem_pwrite(struct page *page, int offset, int len, char __user *user_data,
1396 bool page_do_bit17_swizzling,
1397 bool needs_clflush_before,
1398 bool needs_clflush_after)
1399 {
1400 int ret;
1401
1402 ret = -ENODEV;
1403 if (!page_do_bit17_swizzling) {
1404 char *vaddr = kmap_atomic(page);
1405
1406 if (needs_clflush_before)
1407 drm_clflush_virt_range(vaddr + offset, len);
1408 ret = __copy_from_user_inatomic(vaddr + offset, user_data, len);
1409 if (needs_clflush_after)
1410 drm_clflush_virt_range(vaddr + offset, len);
1411
1412 kunmap_atomic(vaddr);
1413 }
1414 if (ret == 0)
1415 return ret;
1416
1417 return shmem_pwrite_slow(page, offset, len, user_data,
1418 page_do_bit17_swizzling,
1419 needs_clflush_before,
1420 needs_clflush_after);
1421 }
1422
1423 static int
i915_gem_shmem_pwrite(struct drm_i915_gem_object * obj,const struct drm_i915_gem_pwrite * args)1424 i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
1425 const struct drm_i915_gem_pwrite *args)
1426 {
1427 struct drm_i915_private *i915 = to_i915(obj->base.dev);
1428 void __user *user_data;
1429 u64 remain;
1430 unsigned int obj_do_bit17_swizzling;
1431 unsigned int partial_cacheline_write;
1432 unsigned int needs_clflush;
1433 unsigned int offset, idx;
1434 int ret;
1435 #ifdef __DragonFly__
1436 vm_object_t vm_obj;
1437 #endif
1438
1439 ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
1440 if (ret)
1441 return ret;
1442
1443 ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
1444 mutex_unlock(&i915->drm.struct_mutex);
1445 if (ret)
1446 return ret;
1447
1448 obj_do_bit17_swizzling = 0;
1449 if (i915_gem_object_needs_bit17_swizzle(obj))
1450 obj_do_bit17_swizzling = BIT(17);
1451
1452 /* If we don't overwrite a cacheline completely we need to be
1453 * careful to have up-to-date data by first clflushing. Don't
1454 * overcomplicate things and flush the entire patch.
1455 */
1456 partial_cacheline_write = 0;
1457 if (needs_clflush & CLFLUSH_BEFORE)
1458 partial_cacheline_write = boot_cpu_data.x86_clflush_size - 1;
1459
1460 user_data = u64_to_user_ptr(args->data_ptr);
1461 remain = args->size;
1462 offset = offset_in_page(args->offset);
1463 #ifdef __DragonFly__
1464 vm_obj = obj->base.filp;
1465 VM_OBJECT_LOCK(vm_obj);
1466 vm_object_pip_add(vm_obj, 1);
1467 #endif
1468 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) {
1469 struct page *page = i915_gem_object_get_page(obj, idx);
1470 int length;
1471
1472 length = remain;
1473 if (offset + length > PAGE_SIZE)
1474 length = PAGE_SIZE - offset;
1475
1476 ret = shmem_pwrite(page, offset, length, user_data,
1477 page_to_phys(page) & obj_do_bit17_swizzling,
1478 (offset | length) & partial_cacheline_write,
1479 needs_clflush & CLFLUSH_AFTER);
1480 if (ret)
1481 break;
1482
1483 remain -= length;
1484 user_data += length;
1485 offset = 0;
1486 }
1487 #ifdef __DragonFly__
1488 if (vm_obj != obj->base.filp) {
1489 kprintf("i915_gem_shmem_pwrite: VM_OBJECT CHANGED! %p %p\n",
1490 vm_obj, obj->base.filp);
1491 }
1492 vm_object_pip_wakeup(vm_obj);
1493 VM_OBJECT_UNLOCK(vm_obj);
1494 #endif
1495
1496 intel_fb_obj_flush(obj, ORIGIN_CPU);
1497 i915_gem_obj_finish_shmem_access(obj);
1498 return ret;
1499 }
1500
1501 /**
1502 * Writes data to the object referenced by handle.
1503 * @dev: drm device
1504 * @data: ioctl data blob
1505 * @file: drm file
1506 *
1507 * On error, the contents of the buffer that were to be modified are undefined.
1508 */
1509 int
i915_gem_pwrite_ioctl(struct drm_device * dev,void * data,struct drm_file * file)1510 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
1511 struct drm_file *file)
1512 {
1513 struct drm_i915_gem_pwrite *args = data;
1514 struct drm_i915_gem_object *obj;
1515 int ret;
1516
1517 if (args->size == 0)
1518 return 0;
1519
1520 #if 0
1521 if (!access_ok(VERIFY_READ,
1522 u64_to_user_ptr(args->data_ptr),
1523 args->size))
1524 return -EFAULT;
1525 #endif
1526
1527 obj = i915_gem_object_lookup(file, args->handle);
1528 if (!obj)
1529 return -ENOENT;
1530
1531 /* Bounds check destination. */
1532 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) {
1533 ret = -EINVAL;
1534 goto err;
1535 }
1536
1537 trace_i915_gem_object_pwrite(obj, args->offset, args->size);
1538
1539 ret = -ENODEV;
1540 if (obj->ops->pwrite)
1541 ret = obj->ops->pwrite(obj, args);
1542 if (ret != -ENODEV)
1543 goto err;
1544
1545 ret = i915_gem_object_wait(obj,
1546 I915_WAIT_INTERRUPTIBLE |
1547 I915_WAIT_ALL,
1548 MAX_SCHEDULE_TIMEOUT,
1549 to_rps_client(file));
1550 if (ret)
1551 goto err;
1552
1553 ret = i915_gem_object_pin_pages(obj);
1554 if (ret)
1555 goto err;
1556
1557 ret = -EFAULT;
1558 /* We can only do the GTT pwrite on untiled buffers, as otherwise
1559 * it would end up going through the fenced access, and we'll get
1560 * different detiling behavior between reading and writing.
1561 * pread/pwrite currently are reading and writing from the CPU
1562 * perspective, requiring manual detiling by the client.
1563 */
1564 if (!i915_gem_object_has_struct_page(obj) ||
1565 cpu_write_needs_clflush(obj))
1566 /* Note that the gtt paths might fail with non-page-backed user
1567 * pointers (e.g. gtt mappings when moving data between
1568 * textures). Fallback to the shmem path in that case.
1569 */
1570 ret = i915_gem_gtt_pwrite_fast(obj, args);
1571
1572 if (ret == -EFAULT || ret == -ENOSPC) {
1573 if (obj->phys_handle)
1574 ret = i915_gem_phys_pwrite(obj, args, file);
1575 else
1576 ret = i915_gem_shmem_pwrite(obj, args);
1577 }
1578
1579 i915_gem_object_unpin_pages(obj);
1580 err:
1581 i915_gem_object_put(obj);
1582 return ret;
1583 }
1584
i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object * obj)1585 static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
1586 {
1587 struct drm_i915_private *i915;
1588 struct list_head *list;
1589 struct i915_vma *vma;
1590
1591 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
1592
1593 list_for_each_entry(vma, &obj->vma_list, obj_link) {
1594 if (!i915_vma_is_ggtt(vma))
1595 break;
1596
1597 if (i915_vma_is_active(vma))
1598 continue;
1599
1600 if (!drm_mm_node_allocated(&vma->node))
1601 continue;
1602
1603 list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
1604 }
1605
1606 i915 = to_i915(obj->base.dev);
1607 lockmgr(&i915->mm.obj_lock, LK_EXCLUSIVE);
1608 list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list;
1609 list_move_tail(&obj->mm.link, list);
1610 lockmgr(&i915->mm.obj_lock, LK_RELEASE);
1611 }
1612
1613 /**
1614 * Called when user space prepares to use an object with the CPU, either
1615 * through the mmap ioctl's mapping or a GTT mapping.
1616 * @dev: drm device
1617 * @data: ioctl data blob
1618 * @file: drm file
1619 */
1620 int
i915_gem_set_domain_ioctl(struct drm_device * dev,void * data,struct drm_file * file)1621 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1622 struct drm_file *file)
1623 {
1624 struct drm_i915_gem_set_domain *args = data;
1625 struct drm_i915_gem_object *obj;
1626 uint32_t read_domains = args->read_domains;
1627 uint32_t write_domain = args->write_domain;
1628 int err;
1629
1630 /* Only handle setting domains to types used by the CPU. */
1631 if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
1632 return -EINVAL;
1633
1634 /* Having something in the write domain implies it's in the read
1635 * domain, and only that read domain. Enforce that in the request.
1636 */
1637 if (write_domain != 0 && read_domains != write_domain)
1638 return -EINVAL;
1639
1640 obj = i915_gem_object_lookup(file, args->handle);
1641 if (!obj)
1642 return -ENOENT;
1643
1644 /* Try to flush the object off the GPU without holding the lock.
1645 * We will repeat the flush holding the lock in the normal manner
1646 * to catch cases where we are gazumped.
1647 */
1648 err = i915_gem_object_wait(obj,
1649 I915_WAIT_INTERRUPTIBLE |
1650 (write_domain ? I915_WAIT_ALL : 0),
1651 MAX_SCHEDULE_TIMEOUT,
1652 to_rps_client(file));
1653 if (err)
1654 goto out;
1655
1656 /* Flush and acquire obj->pages so that we are coherent through
1657 * direct access in memory with previous cached writes through
1658 * shmemfs and that our cache domain tracking remains valid.
1659 * For example, if the obj->filp was moved to swap without us
1660 * being notified and releasing the pages, we would mistakenly
1661 * continue to assume that the obj remained out of the CPU cached
1662 * domain.
1663 */
1664 err = i915_gem_object_pin_pages(obj);
1665 if (err)
1666 goto out;
1667
1668 err = i915_mutex_lock_interruptible(dev);
1669 if (err)
1670 goto out_unpin;
1671
1672 if (read_domains & I915_GEM_DOMAIN_WC)
1673 err = i915_gem_object_set_to_wc_domain(obj, write_domain);
1674 else if (read_domains & I915_GEM_DOMAIN_GTT)
1675 err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
1676 else
1677 err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
1678
1679 /* And bump the LRU for this access */
1680 i915_gem_object_bump_inactive_ggtt(obj);
1681
1682 mutex_unlock(&dev->struct_mutex);
1683
1684 if (write_domain != 0)
1685 intel_fb_obj_invalidate(obj,
1686 fb_write_origin(obj, write_domain));
1687
1688 out_unpin:
1689 i915_gem_object_unpin_pages(obj);
1690 out:
1691 i915_gem_object_put(obj);
1692 return err;
1693 }
1694
1695 /**
1696 * Called when user space has done writes to this buffer
1697 * @dev: drm device
1698 * @data: ioctl data blob
1699 * @file: drm file
1700 */
1701 int
i915_gem_sw_finish_ioctl(struct drm_device * dev,void * data,struct drm_file * file)1702 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1703 struct drm_file *file)
1704 {
1705 struct drm_i915_gem_sw_finish *args = data;
1706 struct drm_i915_gem_object *obj;
1707
1708 obj = i915_gem_object_lookup(file, args->handle);
1709 if (!obj)
1710 return -ENOENT;
1711
1712 /* Pinned buffers may be scanout, so flush the cache */
1713 i915_gem_object_flush_if_display(obj);
1714 i915_gem_object_put(obj);
1715
1716 return 0;
1717 }
1718
1719 static int
vm_object_map_wc_callback(vm_page_t p,void * data)1720 vm_object_map_wc_callback(vm_page_t p, void *data)
1721 {
1722 pmap_page_set_memattr(p, VM_MEMATTR_WRITE_COMBINING);
1723
1724 return 0;
1725 }
1726
1727 /**
1728 * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address
1729 * it is mapped to.
1730 * @dev: drm device
1731 * @data: ioctl data blob
1732 * @file: drm file
1733 *
1734 * While the mapping holds a reference on the contents of the object, it doesn't
1735 * imply a ref on the object itself.
1736 *
1737 * IMPORTANT:
1738 *
1739 * DRM driver writers who look a this function as an example for how to do GEM
1740 * mmap support, please don't implement mmap support like here. The modern way
1741 * to implement DRM mmap support is with an mmap offset ioctl (like
1742 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
1743 * That way debug tooling like valgrind will understand what's going on, hiding
1744 * the mmap call in a driver private ioctl will break that. The i915 driver only
1745 * does cpu mmaps this way because we didn't know better.
1746 */
1747 int
i915_gem_mmap_ioctl(struct drm_device * dev,void * data,struct drm_file * file)1748 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1749 struct drm_file *file)
1750 {
1751 struct drm_i915_gem_mmap *args = data;
1752 struct drm_i915_gem_object *obj;
1753 unsigned long addr;
1754 #ifdef __DragonFly__
1755 struct proc *p = curproc;
1756 vm_map_t map = &p->p_vmspace->vm_map;
1757 vm_size_t size;
1758 int error = 0, rv;
1759 struct vm_object *vm_obj;
1760 struct rb_vm_page_scan_info info;
1761 #endif
1762
1763 if (args->flags & ~(I915_MMAP_WC))
1764 return -EINVAL;
1765
1766 if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
1767 return -ENODEV;
1768
1769 obj = i915_gem_object_lookup(file, args->handle);
1770 if (!obj)
1771 return -ENOENT;
1772
1773 /* prime objects have no backing filp to GEM mmap
1774 * pages from.
1775 */
1776 if (!obj->base.filp) {
1777 i915_gem_object_put(obj);
1778 return -EINVAL;
1779 }
1780
1781 #ifdef __DragonFly__
1782 if (args->size == 0)
1783 goto out;
1784
1785 size = round_page(args->size);
1786 if (map->size + size > p->p_rlimit[RLIMIT_VMEM].rlim_cur) {
1787 error = -ENOMEM;
1788 goto out;
1789 }
1790
1791 /*
1792 * Call hint to ensure that NULL is not returned as a valid address
1793 * and to reduce vm_map traversals. XXX causes instability, use a
1794 * fixed low address as the start point instead to avoid the NULL
1795 * return issue.
1796 */
1797 addr = PAGE_SIZE;
1798
1799 /*
1800 * Use 256KB alignment. It is unclear why this matters for a
1801 * virtual address but it appears to fix a number of application/X
1802 * crashes and kms console switching is much faster.
1803 */
1804 vm_object_hold(obj->base.filp);
1805 vm_object_reference_locked(obj->base.filp);
1806 vm_object_drop(obj->base.filp);
1807
1808 /* Something gets wrong here: fails to mmap 4096 */
1809 rv = vm_map_find(map, obj->base.filp, NULL,
1810 args->offset, &addr, args->size,
1811 256 * 1024, /* align */
1812 TRUE, /* fitit */
1813 VM_MAPTYPE_NORMAL, VM_SUBSYS_DRM_GEM,
1814 VM_PROT_READ | VM_PROT_WRITE, /* prot */
1815 VM_PROT_READ | VM_PROT_WRITE, /* max */
1816 MAP_SHARED /* cow */);
1817 if (rv != KERN_SUCCESS) {
1818 vm_object_deallocate(obj->base.filp);
1819 error = -vm_mmap_to_errno(rv);
1820 } else {
1821 args->addr_ptr = (uint64_t)addr;
1822 }
1823 #else
1824 addr = vm_mmap(obj->base.filp, 0, args->size,
1825 PROT_READ | PROT_WRITE, MAP_SHARED,
1826 args->offset);
1827 #endif /* __DragonFly__ */
1828 if (args->flags & I915_MMAP_WC) { /* I915_PARAM_MMAP_VERSION */
1829 struct mm_struct *mm = current->mm;
1830 #if 0
1831 struct vm_area_struct *vma;
1832 #endif
1833
1834 if (down_write_killable(&mm->mmap_sem)) {
1835 i915_gem_object_put(obj);
1836 return -EINTR;
1837 }
1838 #ifdef __DragonFly__
1839 vm_obj = obj->base.filp;
1840 vm_object_hold(vm_obj);
1841 vm_obj->memattr = pgprot_writecombine(vm_obj->memattr);
1842 /* Change attributes of all pages in the mapping here */
1843 info.error = 0;
1844 info.count = 0;
1845 vm_page_rb_tree_RB_SCAN(
1846 &vm_obj->rb_memq, /* *head */
1847 NULL, /* *scancmp */
1848 vm_object_map_wc_callback, /* *callback */
1849 &info /* *data */
1850 );
1851 vm_object_drop(vm_obj);
1852 #else
1853 vma = find_vma(mm, addr);
1854 if (vma)
1855 vma->vm_page_prot =
1856 pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
1857 else
1858 addr = -ENOMEM;
1859 #endif /* __DragonFly__ */
1860 up_write(&mm->mmap_sem);
1861
1862 /* This may race, but that's ok, it only gets set */
1863 WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU);
1864 }
1865
1866 out:
1867 i915_gem_object_put(obj);
1868 if (IS_ERR((void *)addr))
1869 return addr;
1870
1871 args->addr_ptr = (uint64_t) addr;
1872
1873 return 0;
1874 }
1875
tile_row_pages(struct drm_i915_gem_object * obj)1876 static unsigned int tile_row_pages(struct drm_i915_gem_object *obj)
1877 {
1878 return i915_gem_object_get_tile_row_size(obj) >> PAGE_SHIFT;
1879 }
1880
1881 /**
1882 * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps
1883 *
1884 * A history of the GTT mmap interface:
1885 *
1886 * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to
1887 * aligned and suitable for fencing, and still fit into the available
1888 * mappable space left by the pinned display objects. A classic problem
1889 * we called the page-fault-of-doom where we would ping-pong between
1890 * two objects that could not fit inside the GTT and so the memcpy
1891 * would page one object in at the expense of the other between every
1892 * single byte.
1893 *
1894 * 1 - Objects can be any size, and have any compatible fencing (X Y, or none
1895 * as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the
1896 * object is too large for the available space (or simply too large
1897 * for the mappable aperture!), a view is created instead and faulted
1898 * into userspace. (This view is aligned and sized appropriately for
1899 * fenced access.)
1900 *
1901 * 2 - Recognise WC as a separate cache domain so that we can flush the
1902 * delayed writes via GTT before performing direct access via WC.
1903 *
1904 * Restrictions:
1905 *
1906 * * snoopable objects cannot be accessed via the GTT. It can cause machine
1907 * hangs on some architectures, corruption on others. An attempt to service
1908 * a GTT page fault from a snoopable object will generate a SIGBUS.
1909 *
1910 * * the object must be able to fit into RAM (physical memory, though no
1911 * limited to the mappable aperture).
1912 *
1913 *
1914 * Caveats:
1915 *
1916 * * a new GTT page fault will synchronize rendering from the GPU and flush
1917 * all data to system memory. Subsequent access will not be synchronized.
1918 *
1919 * * all mappings are revoked on runtime device suspend.
1920 *
1921 * * there are only 8, 16 or 32 fence registers to share between all users
1922 * (older machines require fence register for display and blitter access
1923 * as well). Contention of the fence registers will cause the previous users
1924 * to be unmapped and any new access will generate new page faults.
1925 *
1926 * * running out of memory while servicing a fault may generate a SIGBUS,
1927 * rather than the expected SIGSEGV.
1928 */
i915_gem_mmap_gtt_version(void)1929 int i915_gem_mmap_gtt_version(void)
1930 {
1931 return 2;
1932 }
1933
1934 static inline struct i915_ggtt_view
compute_partial_view(struct drm_i915_gem_object * obj,pgoff_t page_offset,unsigned int chunk)1935 compute_partial_view(struct drm_i915_gem_object *obj,
1936 pgoff_t page_offset,
1937 unsigned int chunk)
1938 {
1939 struct i915_ggtt_view view;
1940
1941 if (i915_gem_object_is_tiled(obj))
1942 chunk = roundup(chunk, tile_row_pages(obj));
1943
1944 view.type = I915_GGTT_VIEW_PARTIAL;
1945 view.partial.offset = rounddown(page_offset, chunk);
1946 view.partial.size =
1947 min_t(unsigned int, chunk,
1948 (obj->base.size >> PAGE_SHIFT) - view.partial.offset);
1949
1950 /* If the partial covers the entire object, just create a normal VMA. */
1951 if (chunk >= obj->base.size >> PAGE_SHIFT)
1952 view.type = I915_GGTT_VIEW_NORMAL;
1953
1954 return view;
1955 }
1956
1957 #ifdef __DragonFly__
drm_vma_node_unmap(struct drm_vma_offset_node * node,struct address_space * file_mapping)1958 static inline void drm_vma_node_unmap(struct drm_vma_offset_node *node,
1959 struct address_space *file_mapping)
1960 {
1961 struct drm_i915_gem_object *obj = container_of(
1962 node,struct drm_i915_gem_object, base.vma_node);
1963 vm_object_t devobj;
1964
1965 devobj = cdev_pager_lookup(obj);
1966 if (devobj != NULL) {
1967 VM_OBJECT_LOCK(devobj);
1968 vm_object_page_remove(devobj, 0, 0, false);
1969 VM_OBJECT_UNLOCK(devobj);
1970 vm_object_deallocate(devobj);
1971 }
1972 }
1973 #endif
1974
__i915_gem_object_release_mmap(struct drm_i915_gem_object * obj)1975 static void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
1976 {
1977 struct i915_vma *vma;
1978
1979 GEM_BUG_ON(!obj->userfault_count);
1980
1981 obj->userfault_count = 0;
1982 list_del(&obj->userfault_link);
1983 #ifdef __DragonFly__
1984 drm_vma_node_unmap(&obj->base.vma_node, NULL);
1985 #else
1986 drm_vma_node_unmap(&obj->base.vma_node,
1987 obj->base.dev->anon_inode->i_mapping);
1988 #endif
1989
1990 list_for_each_entry(vma, &obj->vma_list, obj_link) {
1991 if (!i915_vma_is_ggtt(vma))
1992 break;
1993
1994 i915_vma_unset_userfault(vma);
1995 }
1996 }
1997
1998 /**
1999 * i915_gem_fault - fault a page into the GTT
2000 *
2001 * vm_obj is locked on entry and expected to be locked on return.
2002 *
2003 * This is a OBJT_MGTDEVICE object, *mres will be NULL and should be set
2004 * to the desired vm_page. The page is not indexed into the vm_obj.
2005 *
2006 * XXX Most GEM calls appear to be interruptable, but we can't hard loop
2007 * in that case. Release all resources and wait 1 tick before retrying.
2008 * This is a huge problem which needs to be fixed by getting rid of most
2009 * of the interruptability. The linux code does not retry but does appear
2010 * to have some sort of mechanism (VM_FAULT_NOPAGE ?) for the higher level
2011 * to be able to retry.
2012 *
2013 * --
2014 * @vma: VMA in question
2015 * @vmf: fault info
2016 *
2017 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
2018 * from userspace. The fault handler takes care of binding the object to
2019 * the GTT (if needed), allocating and programming a fence register (again,
2020 * only if needed based on whether the old reg is still valid or the object
2021 * is tiled) and inserting a new PTE into the faulting process.
2022 *
2023 * Note that the faulting process may involve evicting existing objects
2024 * from the GTT and/or fence registers to make room. So performance may
2025 * suffer if the GTT working set is large or there are few fence registers
2026 * left.
2027 *
2028 * The current feature set supported by i915_gem_fault() and thus GTT mmaps
2029 * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version).
2030 * vm_obj is locked on entry and expected to be locked on return. The VM
2031 * pager has placed an anonymous memory page at (obj,offset) which we have
2032 * to replace.
2033 */
2034 // int i915_gem_fault(struct vm_fault *vmf)
i915_gem_fault(vm_object_t vm_obj,vm_ooffset_t offset,int prot,vm_page_t * mres)2035 int i915_gem_fault(vm_object_t vm_obj, vm_ooffset_t offset, int prot, vm_page_t *mres)
2036 {
2037 #define MIN_CHUNK_PAGES ((1 << 20) >> PAGE_SHIFT) /* 1 MiB */
2038 struct vm_area_struct *area;
2039 struct drm_i915_gem_object *obj = to_intel_bo(vm_obj->handle);
2040 struct drm_device *dev = obj->base.dev;
2041 struct drm_i915_private *dev_priv = to_i915(dev);
2042 struct i915_ggtt *ggtt = &dev_priv->ggtt;
2043 bool write = !!(prot & VM_PROT_WRITE);
2044 struct i915_vma *vma;
2045 pgoff_t page_offset;
2046 vm_page_t m;
2047 unsigned int flags;
2048 int ret;
2049 #ifdef __DragonFly__
2050 int didref = 0;
2051 struct vm_area_struct vmas;
2052
2053 /* Fill-in vm_area_struct */
2054 area = &vmas;
2055 area->vm_private_data = vm_obj->handle;
2056 area->vm_start = 0;
2057 area->vm_end = obj->base.size;
2058 area->vm_mm = current->mm;
2059 // XXX: in Linux, mmap_sem is held on entry of this function
2060 // XXX: should that be an exclusive lock ?
2061 down_read(&area->vm_mm->mmap_sem);
2062 #endif
2063
2064 /* We don't use vmf->pgoff since that has the fake offset */
2065 page_offset = (unsigned long)offset >> PAGE_SHIFT;
2066
2067 /*
2068 * vm_fault() has supplied us with a busied page placeholding
2069 * the operation. This presents a lock order reversal issue
2070 * again i915_gem_release_mmap() for our device mutex.
2071 *
2072 * Deal with the problem by getting rid of the placeholder now,
2073 * and then dealing with the potential for a new placeholder when
2074 * we try to insert later.
2075 */
2076 KKASSERT(*mres == NULL);
2077 m = NULL;
2078
2079 retry:
2080 trace_i915_gem_object_fault(obj, page_offset, true, write);
2081
2082 /* Try to flush the object off the GPU first without holding the lock.
2083 * Upon acquiring the lock, we will perform our sanity checks and then
2084 * repeat the flush holding the lock in the normal manner to catch cases
2085 * where we are gazumped.
2086 */
2087 ret = i915_gem_object_wait(obj,
2088 I915_WAIT_INTERRUPTIBLE,
2089 MAX_SCHEDULE_TIMEOUT,
2090 NULL);
2091 if (ret)
2092 goto err;
2093
2094 ret = i915_gem_object_pin_pages(obj);
2095 if (ret)
2096 goto err;
2097
2098 intel_runtime_pm_get(dev_priv);
2099
2100 ret = i915_mutex_lock_interruptible(dev);
2101 if (ret) {
2102 if (ret != -EINTR)
2103 kprintf("i915: caught bug(%d) (mutex_lock_inter)\n", ret);
2104 goto err_rpm;
2105 }
2106
2107 /* Access to snoopable pages through the GTT is incoherent. */
2108 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev_priv)) {
2109 kprintf("i915: caught bug() (cache_level %d %d)\n",
2110 (obj->cache_level), !HAS_LLC(dev_priv));
2111 ret = -EFAULT;
2112 goto err_unlock;
2113 }
2114
2115 /* If the object is smaller than a couple of partial vma, it is
2116 * not worth only creating a single partial vma - we may as well
2117 * clear enough space for the full object.
2118 */
2119 flags = PIN_MAPPABLE;
2120 if (obj->base.size > 2 * MIN_CHUNK_PAGES << PAGE_SHIFT)
2121 flags |= PIN_NONBLOCK | PIN_NONFAULT;
2122
2123 /* Now pin it into the GTT as needed */
2124 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, flags);
2125 if (IS_ERR(vma)) {
2126 /* Use a partial view if it is bigger than available space */
2127 struct i915_ggtt_view view =
2128 compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES);
2129
2130 kprintf("i915_gem_fault: CHUNKING PASS\n");
2131
2132 /* Userspace is now writing through an untracked VMA, abandon
2133 * all hope that the hardware is able to track future writes.
2134 */
2135 obj->frontbuffer_ggtt_origin = ORIGIN_CPU;
2136
2137 vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
2138 }
2139 if (IS_ERR(vma)) {
2140 kprintf("i915: caught bug() (VMA error %ld objsize %ld)\n",
2141 PTR_ERR(vma), obj->base.size);
2142 ret = PTR_ERR(vma);
2143 goto err_unlock;
2144 }
2145
2146 ret = i915_gem_object_set_to_gtt_domain(obj, write);
2147 if (ret) {
2148 kprintf("i915: caught bug(%d) (set_to_gtt_dom)\n", ret);
2149 goto err_unpin;
2150 }
2151
2152 ret = i915_vma_pin_fence(vma);
2153 if (ret) {
2154 kprintf("i915: caught bug(%d) (vma_pin_fence)\n", ret);
2155 goto err_unpin;
2156 }
2157
2158 #ifdef __DragonFly__
2159 /*
2160 * Add a pip count to avoid destruction and certain other
2161 * complex operations (such as collapses?) while unlocked.
2162 */
2163 vm_object_pip_add(vm_obj, 1);
2164 didref = 1;
2165
2166 ret = 0;
2167
2168 #if 0
2169 /* NO LONGER USED */
2170 {
2171 vm_page_t m;
2172
2173 m = vm_page_lookup(vm_obj, OFF_TO_IDX(offset));
2174 if (m) {
2175 if (vm_page_busy_try(m, false)) {
2176 kprintf("i915_gem_fault: BUSY\n");
2177 ret = -EINTR;
2178 goto err_unpin;
2179 }
2180 }
2181 goto have_page;
2182 }
2183 #endif
2184
2185 /* Finally, remap it using the new GTT offset */
2186 m = vm_phys_fictitious_to_vm_page(ggtt->mappable_base +
2187 vma->node.start + offset);
2188
2189 if (m == NULL) {
2190 kprintf("i915: caught bug() (phys_fict_to_vm)\n");
2191 ret = -EFAULT;
2192 goto err_fence;
2193 }
2194 KASSERT((m->flags & PG_FICTITIOUS) != 0, ("not fictitious %p", m));
2195 KASSERT(m->wire_count == 1, ("wire_count not 1 %p", m));
2196
2197 /*
2198 * Try to busy the page. Fails on non-zero return.
2199 */
2200 if (vm_page_busy_try(m, false)) {
2201 kprintf("i915_gem_fault: BUSY(2)\n");
2202 ret = -EINTR;
2203 goto err_fence;
2204 }
2205 m->valid = VM_PAGE_BITS_ALL;
2206
2207 #ifdef USE_INSERT
2208 /* NO LONGER USED */
2209 /*
2210 * This should always work since we already checked via a lookup
2211 * above.
2212 */
2213 if (vm_page_insert(m, vm_obj, OFF_TO_IDX(offset)) == FALSE) {
2214 kprintf("i915:gem_fault: page %p,%jd already in object\n",
2215 vm_obj,
2216 OFF_TO_IDX(offset));
2217 vm_page_wakeup(m);
2218 ret = -EINTR;
2219 goto err_unpin;
2220 }
2221 have_page:
2222 #endif
2223 *mres = m;
2224 ret = VM_PAGER_OK;
2225
2226 /*
2227 * ALTERNATIVE ERROR RETURN.
2228 *
2229 * OBJECT EXPECTED TO BE LOCKED.
2230 */
2231 #endif
2232
2233 /* Mark as being mmapped into userspace for later revocation */
2234 assert_rpm_wakelock_held(dev_priv);
2235 if (!i915_vma_set_userfault(vma) && !obj->userfault_count++)
2236 list_add(&obj->userfault_link, &dev_priv->mm.userfault_list);
2237 GEM_BUG_ON(!obj->userfault_count);
2238
2239 err_fence:
2240 i915_vma_unpin_fence(vma);
2241 err_unpin:
2242 __i915_vma_unpin(vma);
2243 err_unlock:
2244 mutex_unlock(&dev->struct_mutex);
2245 err_rpm:
2246 intel_runtime_pm_put(dev_priv);
2247 i915_gem_object_unpin_pages(obj);
2248 err:
2249 switch (ret) {
2250 case -EIO:
2251 /*
2252 * We eat errors when the gpu is terminally wedged to avoid
2253 * userspace unduly crashing (gl has no provisions for mmaps to
2254 * fail). But any other -EIO isn't ours (e.g. swap in failure)
2255 * and so needs to be reported.
2256 */
2257 if (!i915_terminally_wedged(&dev_priv->gpu_error)) {
2258 // ret = VM_FAULT_SIGBUS;
2259 break;
2260 }
2261 case -EAGAIN:
2262 /*
2263 * EAGAIN means the gpu is hung and we'll wait for the error
2264 * handler to reset everything when re-faulting in
2265 * i915_mutex_lock_interruptible.
2266 */
2267 case -ERESTARTSYS:
2268 case -EINTR:
2269 #ifdef __DragonFly__
2270 if (didref) {
2271 kprintf("i915: caught bug(%d) (retry)\n", ret);
2272 vm_object_pip_wakeup(vm_obj);
2273 didref = 0;
2274 }
2275 VM_OBJECT_UNLOCK(vm_obj);
2276 int dummy;
2277 tsleep(&dummy, 0, "delay", 1); /* XXX */
2278 VM_OBJECT_LOCK(vm_obj);
2279 goto retry;
2280 case VM_PAGER_OK:
2281 break;
2282 #endif
2283 default:
2284 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret);
2285 ret = VM_PAGER_ERROR;
2286 break;
2287 }
2288
2289 #ifdef __DragonFly__
2290 if (didref)
2291 vm_object_pip_wakeup(vm_obj);
2292 else
2293 kprintf("i915: caught bug(%d)\n", ret);
2294
2295 up_read(&area->vm_mm->mmap_sem);
2296 #endif
2297
2298 return ret;
2299 }
2300
2301 /**
2302 * i915_gem_release_mmap - remove physical page mappings
2303 * @obj: obj in question
2304 *
2305 * Preserve the reservation of the mmapping with the DRM core code, but
2306 * relinquish ownership of the pages back to the system.
2307 *
2308 * It is vital that we remove the page mapping if we have mapped a tiled
2309 * object through the GTT and then lose the fence register due to
2310 * resource pressure. Similarly if the object has been moved out of the
2311 * aperture, than pages mapped into userspace must be revoked. Removing the
2312 * mapping will then trigger a page fault on the next user access, allowing
2313 * fixup by i915_gem_fault().
2314 */
2315 void
i915_gem_release_mmap(struct drm_i915_gem_object * obj)2316 i915_gem_release_mmap(struct drm_i915_gem_object *obj)
2317 {
2318 struct drm_i915_private *i915 = to_i915(obj->base.dev);
2319
2320 /* Serialisation between user GTT access and our code depends upon
2321 * revoking the CPU's PTE whilst the mutex is held. The next user
2322 * pagefault then has to wait until we release the mutex.
2323 *
2324 * Note that RPM complicates somewhat by adding an additional
2325 * requirement that operations to the GGTT be made holding the RPM
2326 * wakeref.
2327 */
2328 lockdep_assert_held(&i915->drm.struct_mutex);
2329 intel_runtime_pm_get(i915);
2330
2331 if (!obj->userfault_count)
2332 goto out;
2333
2334 __i915_gem_object_release_mmap(obj);
2335
2336 /* Ensure that the CPU's PTE are revoked and there are not outstanding
2337 * memory transactions from userspace before we return. The TLB
2338 * flushing implied above by changing the PTE above *should* be
2339 * sufficient, an extra barrier here just provides us with a bit
2340 * of paranoid documentation about our requirement to serialise
2341 * memory writes before touching registers / GSM.
2342 */
2343 wmb();
2344
2345 out:
2346 intel_runtime_pm_put(i915);
2347 }
2348
i915_gem_runtime_suspend(struct drm_i915_private * dev_priv)2349 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
2350 {
2351 struct drm_i915_gem_object *obj, *on;
2352 int i;
2353
2354 /*
2355 * Only called during RPM suspend. All users of the userfault_list
2356 * must be holding an RPM wakeref to ensure that this can not
2357 * run concurrently with themselves (and use the struct_mutex for
2358 * protection between themselves).
2359 */
2360
2361 list_for_each_entry_safe(obj, on,
2362 &dev_priv->mm.userfault_list, userfault_link)
2363 __i915_gem_object_release_mmap(obj);
2364
2365 /* The fence will be lost when the device powers down. If any were
2366 * in use by hardware (i.e. they are pinned), we should not be powering
2367 * down! All other fences will be reacquired by the user upon waking.
2368 */
2369 for (i = 0; i < dev_priv->num_fence_regs; i++) {
2370 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
2371
2372 /* Ideally we want to assert that the fence register is not
2373 * live at this point (i.e. that no piece of code will be
2374 * trying to write through fence + GTT, as that both violates
2375 * our tracking of activity and associated locking/barriers,
2376 * but also is illegal given that the hw is powered down).
2377 *
2378 * Previously we used reg->pin_count as a "liveness" indicator.
2379 * That is not sufficient, and we need a more fine-grained
2380 * tool if we want to have a sanity check here.
2381 */
2382
2383 if (!reg->vma)
2384 continue;
2385
2386 GEM_BUG_ON(i915_vma_has_userfault(reg->vma));
2387 reg->dirty = true;
2388 }
2389 }
2390
i915_gem_object_create_mmap_offset(struct drm_i915_gem_object * obj)2391 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
2392 {
2393 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2394 int err;
2395
2396 err = drm_gem_create_mmap_offset(&obj->base);
2397 if (likely(!err))
2398 return 0;
2399
2400 /* Attempt to reap some mmap space from dead objects */
2401 do {
2402 err = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE);
2403 if (err)
2404 break;
2405
2406 i915_gem_drain_freed_objects(dev_priv);
2407 err = drm_gem_create_mmap_offset(&obj->base);
2408 if (!err)
2409 break;
2410
2411 } while (flush_delayed_work(&dev_priv->gt.retire_work));
2412
2413 return err;
2414 }
2415
2416 #if 0
2417 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
2418 {
2419 drm_gem_free_mmap_offset(&obj->base);
2420 }
2421 #endif
2422
2423 int
i915_gem_mmap_gtt(struct drm_file * file,struct drm_device * dev,uint32_t handle,uint64_t * offset)2424 i915_gem_mmap_gtt(struct drm_file *file,
2425 struct drm_device *dev,
2426 uint32_t handle,
2427 uint64_t *offset)
2428 {
2429 struct drm_i915_gem_object *obj;
2430 int ret;
2431
2432 obj = i915_gem_object_lookup(file, handle);
2433 if (!obj)
2434 return -ENOENT;
2435
2436 ret = i915_gem_object_create_mmap_offset(obj);
2437 if (ret == 0)
2438 *offset = DRM_GEM_MAPPING_OFF(obj->base.map_list.key) |
2439 DRM_GEM_MAPPING_KEY;
2440
2441 i915_gem_object_put(obj);
2442 return ret;
2443 }
2444
2445 /**
2446 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
2447 * @dev: DRM device
2448 * @data: GTT mapping ioctl data
2449 * @file: GEM object info
2450 *
2451 * Simply returns the fake offset to userspace so it can mmap it.
2452 * The mmap call will end up in drm_gem_mmap(), which will set things
2453 * up so we can get faults in the handler above.
2454 *
2455 * The fault handler will take care of binding the object into the GTT
2456 * (since it may have been evicted to make room for something), allocating
2457 * a fence register, and mapping the appropriate aperture address into
2458 * userspace.
2459 */
2460 int
i915_gem_mmap_gtt_ioctl(struct drm_device * dev,void * data,struct drm_file * file)2461 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
2462 struct drm_file *file)
2463 {
2464 struct drm_i915_gem_mmap_gtt *args = data;
2465
2466 return i915_gem_mmap_gtt(file, dev, args->handle, (uint64_t *)&args->offset);
2467 }
2468
2469 /* Immediately discard the backing storage */
2470 static void
i915_gem_object_truncate(struct drm_i915_gem_object * obj)2471 i915_gem_object_truncate(struct drm_i915_gem_object *obj)
2472 {
2473 vm_object_t vm_obj = obj->base.filp;
2474
2475 if (obj->base.filp == NULL)
2476 return;
2477
2478 VM_OBJECT_LOCK(vm_obj);
2479 vm_object_page_remove(vm_obj, 0, 0, false);
2480 VM_OBJECT_UNLOCK(vm_obj);
2481
2482 /* Our goal here is to return as much of the memory as
2483 * is possible back to the system as we are called from OOM.
2484 * To do this we must instruct the shmfs to drop all of its
2485 * backing pages, *now*.
2486 */
2487 #if 0
2488 shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
2489 #endif
2490 obj->mm.madv = __I915_MADV_PURGED;
2491 obj->mm.pages = ERR_PTR(-EFAULT);
2492 }
2493
2494 /* Try to discard unwanted pages */
__i915_gem_object_invalidate(struct drm_i915_gem_object * obj)2495 void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
2496 {
2497 #if 0
2498 struct address_space *mapping;
2499 #endif
2500
2501 lockdep_assert_held(&obj->mm.lock);
2502 GEM_BUG_ON(i915_gem_object_has_pages(obj));
2503
2504 switch (obj->mm.madv) {
2505 case I915_MADV_DONTNEED:
2506 i915_gem_object_truncate(obj);
2507 case __I915_MADV_PURGED:
2508 return;
2509 }
2510
2511 if (obj->base.filp == NULL)
2512 return;
2513
2514 #if 0
2515 mapping = obj->base.filp->f_mapping,
2516 invalidate_mapping_pages(mapping, 0, (loff_t)-1);
2517 #endif
2518 invalidate_mapping_pages(obj->base.filp, 0, (loff_t)-1);
2519 }
2520
2521 static void
i915_gem_object_put_pages_gtt(struct drm_i915_gem_object * obj,struct sg_table * pages)2522 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj,
2523 struct sg_table *pages)
2524 {
2525 struct sgt_iter sgt_iter;
2526 struct page *page;
2527
2528 __i915_gem_object_release_shmem(obj, pages, true);
2529
2530 i915_gem_gtt_finish_pages(obj, pages);
2531
2532 if (i915_gem_object_needs_bit17_swizzle(obj))
2533 i915_gem_object_save_bit_17_swizzle(obj, pages);
2534
2535 for_each_sgt_page(page, sgt_iter, pages) {
2536 if (obj->mm.dirty)
2537 set_page_dirty(page);
2538
2539 if (obj->mm.madv == I915_MADV_WILLNEED)
2540 mark_page_accessed(page);
2541
2542 put_page(page);
2543 }
2544 obj->mm.dirty = false;
2545
2546 sg_free_table(pages);
2547 kfree(pages);
2548 }
2549
__i915_gem_object_reset_page_iter(struct drm_i915_gem_object * obj)2550 static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
2551 {
2552 struct radix_tree_iter iter;
2553 void __rcu **slot;
2554
2555 rcu_read_lock();
2556 radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0)
2557 radix_tree_delete(&obj->mm.get_page.radix, iter.index);
2558 rcu_read_unlock();
2559 }
2560
__i915_gem_object_put_pages(struct drm_i915_gem_object * obj,enum i915_mm_subclass subclass)2561 void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
2562 enum i915_mm_subclass subclass)
2563 {
2564 struct drm_i915_private *i915 = to_i915(obj->base.dev);
2565 struct sg_table *pages;
2566
2567 if (i915_gem_object_has_pinned_pages(obj))
2568 return;
2569
2570 GEM_BUG_ON(obj->bind_count);
2571 if (!i915_gem_object_has_pages(obj))
2572 return;
2573
2574 /* May be called by shrinker from within get_pages() (on another bo) */
2575 mutex_lock_nested(&obj->mm.lock, subclass);
2576 if (unlikely(atomic_read(&obj->mm.pages_pin_count)))
2577 goto unlock;
2578
2579 /* ->put_pages might need to allocate memory for the bit17 swizzle
2580 * array, hence protect them from being reaped by removing them from gtt
2581 * lists early. */
2582 pages = fetch_and_zero(&obj->mm.pages);
2583 GEM_BUG_ON(!pages);
2584
2585 lockmgr(&i915->mm.obj_lock, LK_EXCLUSIVE);
2586 list_del(&obj->mm.link);
2587 lockmgr(&i915->mm.obj_lock, LK_RELEASE);
2588
2589 if (obj->mm.mapping) {
2590 void *ptr;
2591
2592 ptr = page_mask_bits(obj->mm.mapping);
2593 if (is_vmalloc_addr(ptr))
2594 vunmap(ptr);
2595 else
2596 kunmap(kmap_to_page(ptr));
2597
2598 obj->mm.mapping = NULL;
2599 }
2600
2601 __i915_gem_object_reset_page_iter(obj);
2602
2603 if (!IS_ERR(pages))
2604 obj->ops->put_pages(obj, pages);
2605
2606 obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;
2607
2608 unlock:
2609 mutex_unlock(&obj->mm.lock);
2610 }
2611
i915_sg_trim(struct sg_table * orig_st)2612 static bool i915_sg_trim(struct sg_table *orig_st)
2613 {
2614 struct sg_table new_st;
2615 struct scatterlist *sg, *new_sg;
2616 unsigned int i;
2617
2618 if (orig_st->nents == orig_st->orig_nents)
2619 return false;
2620
2621 if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL | __GFP_NOWARN))
2622 return false;
2623
2624 new_sg = new_st.sgl;
2625 for_each_sg(orig_st->sgl, sg, orig_st->nents, i) {
2626 sg_set_page(new_sg, sg_page(sg), sg->length, 0);
2627 /* called before being DMA mapped, no need to copy sg->dma_* */
2628 new_sg = sg_next(new_sg);
2629 }
2630 GEM_BUG_ON(new_sg); /* Should walk exactly nents and hit the end */
2631
2632 sg_free_table(orig_st);
2633
2634 *orig_st = new_st;
2635 return true;
2636 }
2637
i915_gem_object_get_pages_gtt(struct drm_i915_gem_object * obj)2638 static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
2639 {
2640 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2641 const unsigned long page_count = obj->base.size / PAGE_SIZE;
2642 unsigned long i;
2643 struct vm_object *mapping;
2644 struct sg_table *st;
2645 struct scatterlist *sg;
2646 struct sgt_iter sgt_iter;
2647 struct page *page;
2648 unsigned long last_pfn = 0; /* suppress gcc warning */
2649 unsigned int max_segment = i915_sg_segment_size();
2650 unsigned int sg_page_sizes;
2651 gfp_t noreclaim;
2652 int ret;
2653
2654 /* Assert that the object is not currently in any GPU domain. As it
2655 * wasn't in the GTT, there shouldn't be any way it could have been in
2656 * a GPU cache
2657 */
2658 GEM_BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
2659 GEM_BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
2660
2661 st = kmalloc(sizeof(*st), M_DRM, GFP_KERNEL);
2662 if (st == NULL)
2663 return -ENOMEM;
2664
2665 rebuild_st:
2666 if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
2667 kfree(st);
2668 return -ENOMEM;
2669 }
2670
2671 /* Get the list of pages out of our struct file. They'll be pinned
2672 * at this point until we release them.
2673 *
2674 * Fail silently without starting the shrinker
2675 */
2676 #ifdef __DragonFly__
2677 mapping = obj->base.filp;
2678 VM_OBJECT_LOCK(mapping);
2679 #endif
2680 noreclaim = mapping_gfp_constraint(mapping,
2681 ~(__GFP_IO | __GFP_RECLAIM));
2682 noreclaim |= __GFP_NORETRY | __GFP_NOWARN;
2683
2684 sg = st->sgl;
2685 st->nents = 0;
2686 sg_page_sizes = 0;
2687 for (i = 0; i < page_count; i++) {
2688 const unsigned int shrink[] = {
2689 I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE,
2690 0,
2691 }, *s = shrink;
2692 gfp_t gfp = noreclaim;
2693
2694 do {
2695 page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2696 if (likely(!IS_ERR(page)))
2697 break;
2698
2699 if (!*s) {
2700 ret = PTR_ERR(page);
2701 goto err_sg;
2702 }
2703
2704 i915_gem_shrink(dev_priv, 2 * page_count, NULL, *s++);
2705 cond_resched();
2706
2707 /* We've tried hard to allocate the memory by reaping
2708 * our own buffer, now let the real VM do its job and
2709 * go down in flames if truly OOM.
2710 *
2711 * However, since graphics tend to be disposable,
2712 * defer the oom here by reporting the ENOMEM back
2713 * to userspace.
2714 */
2715 if (!*s) {
2716 /* reclaim and warn, but no oom */
2717 gfp = mapping_gfp_mask(mapping);
2718
2719 /* Our bo are always dirty and so we require
2720 * kswapd to reclaim our pages (direct reclaim
2721 * does not effectively begin pageout of our
2722 * buffers on its own). However, direct reclaim
2723 * only waits for kswapd when under allocation
2724 * congestion. So as a result __GFP_RECLAIM is
2725 * unreliable and fails to actually reclaim our
2726 * dirty pages -- unless you try over and over
2727 * again with !__GFP_NORETRY. However, we still
2728 * want to fail this allocation rather than
2729 * trigger the out-of-memory killer and for
2730 * this we want __GFP_RETRY_MAYFAIL.
2731 */
2732 gfp |= __GFP_RETRY_MAYFAIL;
2733 }
2734 } while (1);
2735
2736 if (!i ||
2737 sg->length >= max_segment ||
2738 page_to_pfn(page) != last_pfn + 1) {
2739 if (i) {
2740 sg_page_sizes |= sg->length;
2741 sg = sg_next(sg);
2742 }
2743 st->nents++;
2744 sg_set_page(sg, page, PAGE_SIZE, 0);
2745 } else {
2746 sg->length += PAGE_SIZE;
2747 }
2748 last_pfn = page_to_pfn(page);
2749
2750 /* Check that the i965g/gm workaround works. */
2751 WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL));
2752 }
2753 if (sg) { /* loop terminated early; short sg table */
2754 sg_page_sizes |= sg->length;
2755 sg_mark_end(sg);
2756 }
2757 #ifdef __DragonFly__
2758 VM_OBJECT_UNLOCK(mapping);
2759 #endif
2760
2761 /* Trim unused sg entries to avoid wasting memory. */
2762 i915_sg_trim(st);
2763
2764 ret = i915_gem_gtt_prepare_pages(obj, st);
2765 if (ret) {
2766 /* DMA remapping failed? One possible cause is that
2767 * it could not reserve enough large entries, asking
2768 * for PAGE_SIZE chunks instead may be helpful.
2769 */
2770 if (max_segment > PAGE_SIZE) {
2771 for_each_sgt_page(page, sgt_iter, st)
2772 put_page(page);
2773 sg_free_table(st);
2774
2775 max_segment = PAGE_SIZE;
2776 goto rebuild_st;
2777 } else {
2778 dev_warn(&dev_priv->drm.pdev->dev,
2779 "Failed to DMA remap %lu pages\n",
2780 page_count);
2781 goto err_pages;
2782 }
2783 }
2784
2785 if (i915_gem_object_needs_bit17_swizzle(obj))
2786 i915_gem_object_do_bit_17_swizzle(obj, st);
2787
2788 __i915_gem_object_set_pages(obj, st, sg_page_sizes);
2789
2790 return 0;
2791
2792 err_sg:
2793 sg_mark_end(sg);
2794 err_pages:
2795 for_each_sgt_page(page, sgt_iter, st)
2796 put_page(page);
2797 #ifdef __DragonFly__
2798 VM_OBJECT_UNLOCK(mapping);
2799 #endif
2800 sg_free_table(st);
2801 kfree(st);
2802
2803 /* shmemfs first checks if there is enough memory to allocate the page
2804 * and reports ENOSPC should there be insufficient, along with the usual
2805 * ENOMEM for a genuine allocation failure.
2806 *
2807 * We use ENOSPC in our driver to mean that we have run out of aperture
2808 * space and so want to translate the error from shmemfs back to our
2809 * usual understanding of ENOMEM.
2810 */
2811 if (ret == -ENOSPC)
2812 ret = -ENOMEM;
2813
2814 return ret;
2815 }
2816
__i915_gem_object_set_pages(struct drm_i915_gem_object * obj,struct sg_table * pages,unsigned int sg_page_sizes)2817 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
2818 struct sg_table *pages,
2819 unsigned int sg_page_sizes)
2820 {
2821 struct drm_i915_private *i915 = to_i915(obj->base.dev);
2822 unsigned long supported = INTEL_INFO(i915)->page_sizes;
2823 int i;
2824
2825 lockdep_assert_held(&obj->mm.lock);
2826
2827 obj->mm.get_page.sg_pos = pages->sgl;
2828 obj->mm.get_page.sg_idx = 0;
2829
2830 obj->mm.pages = pages;
2831
2832 if (i915_gem_object_is_tiled(obj) &&
2833 i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
2834 GEM_BUG_ON(obj->mm.quirked);
2835 __i915_gem_object_pin_pages(obj);
2836 obj->mm.quirked = true;
2837 }
2838
2839 GEM_BUG_ON(!sg_page_sizes);
2840 obj->mm.page_sizes.phys = sg_page_sizes;
2841
2842 /*
2843 * Calculate the supported page-sizes which fit into the given
2844 * sg_page_sizes. This will give us the page-sizes which we may be able
2845 * to use opportunistically when later inserting into the GTT. For
2846 * example if phys=2G, then in theory we should be able to use 1G, 2M,
2847 * 64K or 4K pages, although in practice this will depend on a number of
2848 * other factors.
2849 */
2850 obj->mm.page_sizes.sg = 0;
2851 for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) {
2852 if (obj->mm.page_sizes.phys & ~0u << i)
2853 obj->mm.page_sizes.sg |= BIT(i);
2854 }
2855 GEM_BUG_ON(!HAS_PAGE_SIZES(i915, obj->mm.page_sizes.sg));
2856
2857 lockmgr(&i915->mm.obj_lock, LK_EXCLUSIVE);
2858 list_add(&obj->mm.link, &i915->mm.unbound_list);
2859 lockmgr(&i915->mm.obj_lock, LK_RELEASE);
2860 }
2861
____i915_gem_object_get_pages(struct drm_i915_gem_object * obj)2862 static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2863 {
2864 int err;
2865
2866 if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) {
2867 DRM_DEBUG("Attempting to obtain a purgeable object\n");
2868 return -EFAULT;
2869 }
2870
2871 err = obj->ops->get_pages(obj);
2872 GEM_BUG_ON(!err && IS_ERR_OR_NULL(obj->mm.pages));
2873
2874 return err;
2875 }
2876
2877 /* Ensure that the associated pages are gathered from the backing storage
2878 * and pinned into our object. i915_gem_object_pin_pages() may be called
2879 * multiple times before they are released by a single call to
2880 * i915_gem_object_unpin_pages() - once the pages are no longer referenced
2881 * either as a result of memory pressure (reaping pages under the shrinker)
2882 * or as the object is itself released.
2883 */
__i915_gem_object_get_pages(struct drm_i915_gem_object * obj)2884 int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2885 {
2886 int err;
2887
2888 err = mutex_lock_interruptible(&obj->mm.lock);
2889 if (err)
2890 return err;
2891
2892 if (unlikely(!i915_gem_object_has_pages(obj))) {
2893 GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
2894
2895 err = ____i915_gem_object_get_pages(obj);
2896 if (err)
2897 goto unlock;
2898
2899 smp_mb__before_atomic();
2900 }
2901 atomic_inc(&obj->mm.pages_pin_count);
2902
2903 unlock:
2904 mutex_unlock(&obj->mm.lock);
2905 return err;
2906 }
2907
2908 /* The 'mapping' part of i915_gem_object_pin_map() below */
i915_gem_object_map(const struct drm_i915_gem_object * obj,enum i915_map_type type)2909 static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
2910 enum i915_map_type type)
2911 {
2912 unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
2913 struct sg_table *sgt = obj->mm.pages;
2914 struct sgt_iter sgt_iter;
2915 struct page *page;
2916 struct page *stack_pages[32];
2917 struct page **pages = stack_pages;
2918 unsigned long i = 0;
2919 pgprot_t pgprot;
2920 void *addr;
2921
2922 /* A single page can always be kmapped */
2923 if (n_pages == 1 && type == I915_MAP_WB)
2924 return kmap(sg_page(sgt->sgl));
2925
2926 if (n_pages > ARRAY_SIZE(stack_pages)) {
2927 /* Too big for stack -- allocate temporary array instead */
2928 pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL);
2929 if (!pages)
2930 return NULL;
2931 }
2932
2933 for_each_sgt_page(page, sgt_iter, sgt)
2934 pages[i++] = page;
2935
2936 /* Check that we have the expected number of pages */
2937 GEM_BUG_ON(i != n_pages);
2938
2939 switch (type) {
2940 default:
2941 MISSING_CASE(type);
2942 /* fallthrough to use PAGE_KERNEL anyway */
2943 case I915_MAP_WB:
2944 pgprot = PAGE_KERNEL;
2945 break;
2946 case I915_MAP_WC:
2947 pgprot = pgprot_writecombine(PAGE_KERNEL_IO);
2948 break;
2949 }
2950 addr = vmap(pages, n_pages, 0, pgprot);
2951
2952 if (pages != stack_pages)
2953 kvfree(pages);
2954
2955 return addr;
2956 }
2957
2958 /* get, pin, and map the pages of the object into kernel space */
i915_gem_object_pin_map(struct drm_i915_gem_object * obj,enum i915_map_type type)2959 void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
2960 enum i915_map_type type)
2961 {
2962 enum i915_map_type has_type;
2963 bool pinned;
2964 void *ptr;
2965 int ret;
2966
2967 GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
2968
2969 ret = mutex_lock_interruptible(&obj->mm.lock);
2970 if (ret)
2971 return ERR_PTR(ret);
2972
2973 pinned = !(type & I915_MAP_OVERRIDE);
2974 type &= ~I915_MAP_OVERRIDE;
2975
2976 if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) {
2977 if (unlikely(!i915_gem_object_has_pages(obj))) {
2978 GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
2979
2980 ret = ____i915_gem_object_get_pages(obj);
2981 if (ret)
2982 goto err_unlock;
2983
2984 smp_mb__before_atomic();
2985 }
2986 atomic_inc(&obj->mm.pages_pin_count);
2987 pinned = false;
2988 }
2989 GEM_BUG_ON(!i915_gem_object_has_pages(obj));
2990
2991 ptr = page_unpack_bits(obj->mm.mapping, &has_type);
2992 if (ptr && has_type != type) {
2993 if (pinned) {
2994 ret = -EBUSY;
2995 goto err_unpin;
2996 }
2997
2998 if (is_vmalloc_addr(ptr))
2999 vunmap(ptr);
3000 else
3001 kunmap(kmap_to_page(ptr));
3002
3003 ptr = obj->mm.mapping = NULL;
3004 }
3005
3006 if (!ptr) {
3007 ptr = i915_gem_object_map(obj, type);
3008 if (!ptr) {
3009 ret = -ENOMEM;
3010 goto err_unpin;
3011 }
3012
3013 obj->mm.mapping = page_pack_bits(ptr, type);
3014 }
3015
3016 out_unlock:
3017 mutex_unlock(&obj->mm.lock);
3018 return ptr;
3019
3020 err_unpin:
3021 atomic_dec(&obj->mm.pages_pin_count);
3022 err_unlock:
3023 ptr = ERR_PTR(ret);
3024 goto out_unlock;
3025 }
3026
3027 static int
i915_gem_object_pwrite_gtt(struct drm_i915_gem_object * obj,const struct drm_i915_gem_pwrite * arg)3028 i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
3029 const struct drm_i915_gem_pwrite *arg)
3030 {
3031 #ifndef __DragonFly__
3032 struct address_space *mapping = obj->base.filp->f_mapping;
3033 #endif
3034 char __user *user_data = u64_to_user_ptr(arg->data_ptr);
3035 u64 remain, offset;
3036 unsigned int pg;
3037
3038 /* Before we instantiate/pin the backing store for our use, we
3039 * can prepopulate the shmemfs filp efficiently using a write into
3040 * the pagecache. We avoid the penalty of instantiating all the
3041 * pages, important if the user is just writing to a few and never
3042 * uses the object on the GPU, and using a direct write into shmemfs
3043 * allows it to avoid the cost of retrieving a page (either swapin
3044 * or clearing-before-use) before it is overwritten.
3045 */
3046 if (i915_gem_object_has_pages(obj))
3047 return -ENODEV;
3048
3049 if (obj->mm.madv != I915_MADV_WILLNEED)
3050 return -EFAULT;
3051
3052 /* Before the pages are instantiated the object is treated as being
3053 * in the CPU domain. The pages will be clflushed as required before
3054 * use, and we can freely write into the pages directly. If userspace
3055 * races pwrite with any other operation; corruption will ensue -
3056 * that is userspace's prerogative!
3057 */
3058
3059 remain = arg->size;
3060 offset = arg->offset;
3061 pg = offset_in_page(offset);
3062
3063 do {
3064 unsigned int len, unwritten;
3065 struct page *page;
3066 void *data, *vaddr;
3067 int err;
3068
3069 len = PAGE_SIZE - pg;
3070 if (len > remain)
3071 len = remain;
3072
3073 #ifndef __DragonFly__
3074 err = pagecache_write_begin(obj->base.filp, mapping,
3075 #else
3076 err = pagecache_write_begin(obj->base.filp, NULL,
3077 #endif
3078 offset, len, 0,
3079 &page, &data);
3080 if (err < 0)
3081 return err;
3082
3083 vaddr = kmap(page);
3084 unwritten = copy_from_user(vaddr + pg, user_data, len);
3085 kunmap(page);
3086
3087 #ifndef __DragonFly__
3088 err = pagecache_write_end(obj->base.filp, mapping,
3089 #else
3090 err = pagecache_write_end(obj->base.filp, NULL,
3091 #endif
3092 offset, len, len - unwritten,
3093 page, data);
3094 if (err < 0)
3095 return err;
3096
3097 if (unwritten)
3098 return -EFAULT;
3099
3100 remain -= len;
3101 user_data += len;
3102 offset += len;
3103 pg = 0;
3104 } while (remain);
3105
3106 return 0;
3107 }
3108
ban_context(const struct i915_gem_context * ctx,unsigned int score)3109 static bool ban_context(const struct i915_gem_context *ctx,
3110 unsigned int score)
3111 {
3112 return (i915_gem_context_is_bannable(ctx) &&
3113 score >= CONTEXT_SCORE_BAN_THRESHOLD);
3114 }
3115
i915_gem_context_mark_guilty(struct i915_gem_context * ctx)3116 static void i915_gem_context_mark_guilty(struct i915_gem_context *ctx)
3117 {
3118 unsigned int score;
3119 bool banned;
3120
3121 atomic_inc(&ctx->guilty_count);
3122
3123 score = atomic_add_return(CONTEXT_SCORE_GUILTY, &ctx->ban_score);
3124 banned = ban_context(ctx, score);
3125 DRM_DEBUG_DRIVER("context %s marked guilty (score %d) banned? %s\n",
3126 ctx->name, score, yesno(banned));
3127 if (!banned)
3128 return;
3129
3130 i915_gem_context_set_banned(ctx);
3131 if (!IS_ERR_OR_NULL(ctx->file_priv)) {
3132 atomic_inc(&ctx->file_priv->context_bans);
3133 DRM_DEBUG_DRIVER("client %s has had %d context banned\n",
3134 ctx->name, atomic_read(&ctx->file_priv->context_bans));
3135 }
3136 }
3137
i915_gem_context_mark_innocent(struct i915_gem_context * ctx)3138 static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx)
3139 {
3140 atomic_inc(&ctx->active_count);
3141 }
3142
3143 struct drm_i915_gem_request *
i915_gem_find_active_request(struct intel_engine_cs * engine)3144 i915_gem_find_active_request(struct intel_engine_cs *engine)
3145 {
3146 struct drm_i915_gem_request *request, *active = NULL;
3147 unsigned long flags;
3148
3149 /* We are called by the error capture and reset at a random
3150 * point in time. In particular, note that neither is crucially
3151 * ordered with an interrupt. After a hang, the GPU is dead and we
3152 * assume that no more writes can happen (we waited long enough for
3153 * all writes that were in transaction to be flushed) - adding an
3154 * extra delay for a recent interrupt is pointless. Hence, we do
3155 * not need an engine->irq_seqno_barrier() before the seqno reads.
3156 */
3157 spin_lock_irqsave(&engine->timeline->lock, flags);
3158 list_for_each_entry(request, &engine->timeline->requests, link) {
3159 if (__i915_gem_request_completed(request,
3160 request->global_seqno))
3161 continue;
3162
3163 GEM_BUG_ON(request->engine != engine);
3164 GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
3165 &request->fence.flags));
3166
3167 active = request;
3168 break;
3169 }
3170 spin_unlock_irqrestore(&engine->timeline->lock, flags);
3171
3172 return active;
3173 }
3174
engine_stalled(struct intel_engine_cs * engine)3175 static bool engine_stalled(struct intel_engine_cs *engine)
3176 {
3177 if (!engine->hangcheck.stalled)
3178 return false;
3179
3180 /* Check for possible seqno movement after hang declaration */
3181 if (engine->hangcheck.seqno != intel_engine_get_seqno(engine)) {
3182 DRM_DEBUG_DRIVER("%s pardoned\n", engine->name);
3183 return false;
3184 }
3185
3186 return true;
3187 }
3188
3189 /*
3190 * Ensure irq handler finishes, and not run again.
3191 * Also return the active request so that we only search for it once.
3192 */
3193 struct drm_i915_gem_request *
i915_gem_reset_prepare_engine(struct intel_engine_cs * engine)3194 i915_gem_reset_prepare_engine(struct intel_engine_cs *engine)
3195 {
3196 struct drm_i915_gem_request *request = NULL;
3197
3198 /*
3199 * During the reset sequence, we must prevent the engine from
3200 * entering RC6. As the context state is undefined until we restart
3201 * the engine, if it does enter RC6 during the reset, the state
3202 * written to the powercontext is undefined and so we may lose
3203 * GPU state upon resume, i.e. fail to restart after a reset.
3204 */
3205 intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL);
3206
3207 /*
3208 * Prevent the signaler thread from updating the request
3209 * state (by calling dma_fence_signal) as we are processing
3210 * the reset. The write from the GPU of the seqno is
3211 * asynchronous and the signaler thread may see a different
3212 * value to us and declare the request complete, even though
3213 * the reset routine have picked that request as the active
3214 * (incomplete) request. This conflict is not handled
3215 * gracefully!
3216 */
3217 kthread_park(engine->breadcrumbs.signaler);
3218
3219 /*
3220 * Prevent request submission to the hardware until we have
3221 * completed the reset in i915_gem_reset_finish(). If a request
3222 * is completed by one engine, it may then queue a request
3223 * to a second via its engine->irq_tasklet *just* as we are
3224 * calling engine->init_hw() and also writing the ELSP.
3225 * Turning off the engine->irq_tasklet until the reset is over
3226 * prevents the race.
3227 */
3228 tasklet_kill(&engine->execlists.irq_tasklet);
3229 tasklet_disable(&engine->execlists.irq_tasklet);
3230
3231 if (engine->irq_seqno_barrier)
3232 engine->irq_seqno_barrier(engine);
3233
3234 request = i915_gem_find_active_request(engine);
3235 if (request && request->fence.error == -EIO)
3236 request = ERR_PTR(-EIO); /* Previous reset failed! */
3237
3238 return request;
3239 }
3240
i915_gem_reset_prepare(struct drm_i915_private * dev_priv)3241 int i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
3242 {
3243 struct intel_engine_cs *engine;
3244 struct drm_i915_gem_request *request;
3245 enum intel_engine_id id;
3246 int err = 0;
3247
3248 for_each_engine(engine, dev_priv, id) {
3249 request = i915_gem_reset_prepare_engine(engine);
3250 if (IS_ERR(request)) {
3251 err = PTR_ERR(request);
3252 continue;
3253 }
3254
3255 engine->hangcheck.active_request = request;
3256 }
3257
3258 i915_gem_revoke_fences(dev_priv);
3259
3260 return err;
3261 }
3262
skip_request(struct drm_i915_gem_request * request)3263 static void skip_request(struct drm_i915_gem_request *request)
3264 {
3265 void *vaddr = request->ring->vaddr;
3266 u32 head;
3267
3268 /* As this request likely depends on state from the lost
3269 * context, clear out all the user operations leaving the
3270 * breadcrumb at the end (so we get the fence notifications).
3271 */
3272 head = request->head;
3273 if (request->postfix < head) {
3274 memset(vaddr + head, 0, request->ring->size - head);
3275 head = 0;
3276 }
3277 memset(vaddr + head, 0, request->postfix - head);
3278
3279 dma_fence_set_error(&request->fence, -EIO);
3280 }
3281
engine_skip_context(struct drm_i915_gem_request * request)3282 static void engine_skip_context(struct drm_i915_gem_request *request)
3283 {
3284 struct intel_engine_cs *engine = request->engine;
3285 struct i915_gem_context *hung_ctx = request->ctx;
3286 struct intel_timeline *timeline;
3287 unsigned long flags;
3288
3289 timeline = i915_gem_context_lookup_timeline(hung_ctx, engine);
3290
3291 spin_lock_irqsave(&engine->timeline->lock, flags);
3292 lockmgr(&timeline->lock, LK_EXCLUSIVE);
3293
3294 list_for_each_entry_continue(request, &engine->timeline->requests, link)
3295 if (request->ctx == hung_ctx)
3296 skip_request(request);
3297
3298 list_for_each_entry(request, &timeline->requests, link)
3299 skip_request(request);
3300
3301 lockmgr(&timeline->lock, LK_RELEASE);
3302 spin_unlock_irqrestore(&engine->timeline->lock, flags);
3303 }
3304
3305 /* Returns the request if it was guilty of the hang */
3306 static struct drm_i915_gem_request *
i915_gem_reset_request(struct intel_engine_cs * engine,struct drm_i915_gem_request * request)3307 i915_gem_reset_request(struct intel_engine_cs *engine,
3308 struct drm_i915_gem_request *request)
3309 {
3310 /* The guilty request will get skipped on a hung engine.
3311 *
3312 * Users of client default contexts do not rely on logical
3313 * state preserved between batches so it is safe to execute
3314 * queued requests following the hang. Non default contexts
3315 * rely on preserved state, so skipping a batch loses the
3316 * evolution of the state and it needs to be considered corrupted.
3317 * Executing more queued batches on top of corrupted state is
3318 * risky. But we take the risk by trying to advance through
3319 * the queued requests in order to make the client behaviour
3320 * more predictable around resets, by not throwing away random
3321 * amount of batches it has prepared for execution. Sophisticated
3322 * clients can use gem_reset_stats_ioctl and dma fence status
3323 * (exported via sync_file info ioctl on explicit fences) to observe
3324 * when it loses the context state and should rebuild accordingly.
3325 *
3326 * The context ban, and ultimately the client ban, mechanism are safety
3327 * valves if client submission ends up resulting in nothing more than
3328 * subsequent hangs.
3329 */
3330
3331 if (engine_stalled(engine)) {
3332 i915_gem_context_mark_guilty(request->ctx);
3333 skip_request(request);
3334
3335 /* If this context is now banned, skip all pending requests. */
3336 if (i915_gem_context_is_banned(request->ctx))
3337 engine_skip_context(request);
3338 } else {
3339 /*
3340 * Since this is not the hung engine, it may have advanced
3341 * since the hang declaration. Double check by refinding
3342 * the active request at the time of the reset.
3343 */
3344 request = i915_gem_find_active_request(engine);
3345 if (request) {
3346 i915_gem_context_mark_innocent(request->ctx);
3347 dma_fence_set_error(&request->fence, -EAGAIN);
3348
3349 /* Rewind the engine to replay the incomplete rq */
3350 spin_lock_irq(&engine->timeline->lock);
3351 request = list_prev_entry(request, link);
3352 if (&request->link == &engine->timeline->requests)
3353 request = NULL;
3354 spin_unlock_irq(&engine->timeline->lock);
3355 }
3356 }
3357
3358 return request;
3359 }
3360
i915_gem_reset_engine(struct intel_engine_cs * engine,struct drm_i915_gem_request * request)3361 void i915_gem_reset_engine(struct intel_engine_cs *engine,
3362 struct drm_i915_gem_request *request)
3363 {
3364 engine->irq_posted = 0;
3365
3366 if (request)
3367 request = i915_gem_reset_request(engine, request);
3368
3369 if (request) {
3370 DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n",
3371 engine->name, request->global_seqno);
3372 }
3373
3374 /* Setup the CS to resume from the breadcrumb of the hung request */
3375 engine->reset_hw(engine, request);
3376 }
3377
i915_gem_reset(struct drm_i915_private * dev_priv)3378 void i915_gem_reset(struct drm_i915_private *dev_priv)
3379 {
3380 struct intel_engine_cs *engine;
3381 enum intel_engine_id id;
3382
3383 lockdep_assert_held(&dev_priv->drm.struct_mutex);
3384
3385 i915_gem_retire_requests(dev_priv);
3386
3387 for_each_engine(engine, dev_priv, id) {
3388 struct i915_gem_context *ctx;
3389
3390 i915_gem_reset_engine(engine, engine->hangcheck.active_request);
3391 ctx = fetch_and_zero(&engine->last_retired_context);
3392 if (ctx)
3393 engine->context_unpin(engine, ctx);
3394 }
3395
3396 i915_gem_restore_fences(dev_priv);
3397
3398 if (dev_priv->gt.awake) {
3399 intel_sanitize_gt_powersave(dev_priv);
3400 intel_enable_gt_powersave(dev_priv);
3401 if (INTEL_GEN(dev_priv) >= 6)
3402 gen6_rps_busy(dev_priv);
3403 }
3404 }
3405
i915_gem_reset_finish_engine(struct intel_engine_cs * engine)3406 void i915_gem_reset_finish_engine(struct intel_engine_cs *engine)
3407 {
3408 tasklet_enable(&engine->execlists.irq_tasklet);
3409 kthread_unpark(engine->breadcrumbs.signaler);
3410
3411 intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL);
3412 }
3413
i915_gem_reset_finish(struct drm_i915_private * dev_priv)3414 void i915_gem_reset_finish(struct drm_i915_private *dev_priv)
3415 {
3416 struct intel_engine_cs *engine;
3417 enum intel_engine_id id;
3418
3419 lockdep_assert_held(&dev_priv->drm.struct_mutex);
3420
3421 for_each_engine(engine, dev_priv, id) {
3422 engine->hangcheck.active_request = NULL;
3423 i915_gem_reset_finish_engine(engine);
3424 }
3425 }
3426
nop_submit_request(struct drm_i915_gem_request * request)3427 static void nop_submit_request(struct drm_i915_gem_request *request)
3428 {
3429 dma_fence_set_error(&request->fence, -EIO);
3430
3431 i915_gem_request_submit(request);
3432 }
3433
nop_complete_submit_request(struct drm_i915_gem_request * request)3434 static void nop_complete_submit_request(struct drm_i915_gem_request *request)
3435 {
3436 unsigned long flags;
3437
3438 dma_fence_set_error(&request->fence, -EIO);
3439
3440 spin_lock_irqsave(&request->engine->timeline->lock, flags);
3441 __i915_gem_request_submit(request);
3442 intel_engine_init_global_seqno(request->engine, request->global_seqno);
3443 spin_unlock_irqrestore(&request->engine->timeline->lock, flags);
3444 }
3445
i915_gem_set_wedged(struct drm_i915_private * i915)3446 void i915_gem_set_wedged(struct drm_i915_private *i915)
3447 {
3448 struct intel_engine_cs *engine;
3449 enum intel_engine_id id;
3450
3451 /*
3452 * First, stop submission to hw, but do not yet complete requests by
3453 * rolling the global seqno forward (since this would complete requests
3454 * for which we haven't set the fence error to EIO yet).
3455 */
3456 for_each_engine(engine, i915, id)
3457 engine->submit_request = nop_submit_request;
3458
3459 /*
3460 * Make sure no one is running the old callback before we proceed with
3461 * cancelling requests and resetting the completion tracking. Otherwise
3462 * we might submit a request to the hardware which never completes.
3463 */
3464 synchronize_rcu();
3465
3466 for_each_engine(engine, i915, id) {
3467 /* Mark all executing requests as skipped */
3468 engine->cancel_requests(engine);
3469
3470 /*
3471 * Only once we've force-cancelled all in-flight requests can we
3472 * start to complete all requests.
3473 */
3474 engine->submit_request = nop_complete_submit_request;
3475 }
3476
3477 /*
3478 * Make sure no request can slip through without getting completed by
3479 * either this call here to intel_engine_init_global_seqno, or the one
3480 * in nop_complete_submit_request.
3481 */
3482 synchronize_rcu();
3483
3484 for_each_engine(engine, i915, id) {
3485 unsigned long flags;
3486
3487 /* Mark all pending requests as complete so that any concurrent
3488 * (lockless) lookup doesn't try and wait upon the request as we
3489 * reset it.
3490 */
3491 spin_lock_irqsave(&engine->timeline->lock, flags);
3492 intel_engine_init_global_seqno(engine,
3493 intel_engine_last_submit(engine));
3494 spin_unlock_irqrestore(&engine->timeline->lock, flags);
3495 }
3496
3497 set_bit(I915_WEDGED, &i915->gpu_error.flags);
3498 wake_up_all(&i915->gpu_error.reset_queue);
3499 }
3500
i915_gem_unset_wedged(struct drm_i915_private * i915)3501 bool i915_gem_unset_wedged(struct drm_i915_private *i915)
3502 {
3503 struct i915_gem_timeline *tl;
3504 int i;
3505
3506 lockdep_assert_held(&i915->drm.struct_mutex);
3507 if (!test_bit(I915_WEDGED, &i915->gpu_error.flags))
3508 return true;
3509
3510 /* Before unwedging, make sure that all pending operations
3511 * are flushed and errored out - we may have requests waiting upon
3512 * third party fences. We marked all inflight requests as EIO, and
3513 * every execbuf since returned EIO, for consistency we want all
3514 * the currently pending requests to also be marked as EIO, which
3515 * is done inside our nop_submit_request - and so we must wait.
3516 *
3517 * No more can be submitted until we reset the wedged bit.
3518 */
3519 list_for_each_entry(tl, &i915->gt.timelines, link) {
3520 for (i = 0; i < ARRAY_SIZE(tl->engine); i++) {
3521 struct drm_i915_gem_request *rq;
3522
3523 rq = i915_gem_active_peek(&tl->engine[i].last_request,
3524 &i915->drm.struct_mutex);
3525 if (!rq)
3526 continue;
3527
3528 /* We can't use our normal waiter as we want to
3529 * avoid recursively trying to handle the current
3530 * reset. The basic dma_fence_default_wait() installs
3531 * a callback for dma_fence_signal(), which is
3532 * triggered by our nop handler (indirectly, the
3533 * callback enables the signaler thread which is
3534 * woken by the nop_submit_request() advancing the seqno
3535 * and when the seqno passes the fence, the signaler
3536 * then signals the fence waking us up).
3537 */
3538 if (dma_fence_default_wait(&rq->fence, true,
3539 MAX_SCHEDULE_TIMEOUT) < 0)
3540 return false;
3541 }
3542 }
3543
3544 /* Undo nop_submit_request. We prevent all new i915 requests from
3545 * being queued (by disallowing execbuf whilst wedged) so having
3546 * waited for all active requests above, we know the system is idle
3547 * and do not have to worry about a thread being inside
3548 * engine->submit_request() as we swap over. So unlike installing
3549 * the nop_submit_request on reset, we can do this from normal
3550 * context and do not require stop_machine().
3551 */
3552 intel_engines_reset_default_submission(i915);
3553 i915_gem_contexts_lost(i915);
3554
3555 smp_mb__before_atomic(); /* complete takeover before enabling execbuf */
3556 clear_bit(I915_WEDGED, &i915->gpu_error.flags);
3557
3558 return true;
3559 }
3560
3561 static void
i915_gem_retire_work_handler(struct work_struct * work)3562 i915_gem_retire_work_handler(struct work_struct *work)
3563 {
3564 struct drm_i915_private *dev_priv =
3565 container_of(work, typeof(*dev_priv), gt.retire_work.work);
3566 struct drm_device *dev = &dev_priv->drm;
3567
3568 /* Come back later if the device is busy... */
3569 if (mutex_trylock(&dev->struct_mutex)) {
3570 i915_gem_retire_requests(dev_priv);
3571 mutex_unlock(&dev->struct_mutex);
3572 }
3573
3574 /* Keep the retire handler running until we are finally idle.
3575 * We do not need to do this test under locking as in the worst-case
3576 * we queue the retire worker once too often.
3577 */
3578 if (READ_ONCE(dev_priv->gt.awake)) {
3579 i915_queue_hangcheck(dev_priv);
3580 queue_delayed_work(dev_priv->wq,
3581 &dev_priv->gt.retire_work,
3582 round_jiffies_up_relative(HZ));
3583 }
3584 }
3585
3586 static void
i915_gem_idle_work_handler(struct work_struct * work)3587 i915_gem_idle_work_handler(struct work_struct *work)
3588 {
3589 struct drm_i915_private *dev_priv =
3590 container_of(work, typeof(*dev_priv), gt.idle_work.work);
3591 struct drm_device *dev = &dev_priv->drm;
3592 bool rearm_hangcheck;
3593
3594 if (!READ_ONCE(dev_priv->gt.awake))
3595 return;
3596
3597 /*
3598 * Wait for last execlists context complete, but bail out in case a
3599 * new request is submitted.
3600 */
3601 wait_for(intel_engines_are_idle(dev_priv), 10);
3602 if (READ_ONCE(dev_priv->gt.active_requests))
3603 return;
3604
3605 rearm_hangcheck =
3606 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
3607
3608 if (!mutex_trylock(&dev->struct_mutex)) {
3609 /* Currently busy, come back later */
3610 mod_delayed_work(dev_priv->wq,
3611 &dev_priv->gt.idle_work,
3612 msecs_to_jiffies(50));
3613 goto out_rearm;
3614 }
3615
3616 /*
3617 * New request retired after this work handler started, extend active
3618 * period until next instance of the work.
3619 */
3620 if (work_pending(work))
3621 goto out_unlock;
3622
3623 if (dev_priv->gt.active_requests)
3624 goto out_unlock;
3625
3626 if (wait_for(intel_engines_are_idle(dev_priv), 10))
3627 DRM_ERROR("Timeout waiting for engines to idle\n");
3628
3629 intel_engines_mark_idle(dev_priv);
3630 i915_gem_timelines_mark_idle(dev_priv);
3631
3632 GEM_BUG_ON(!dev_priv->gt.awake);
3633 dev_priv->gt.awake = false;
3634 rearm_hangcheck = false;
3635
3636 if (INTEL_GEN(dev_priv) >= 6)
3637 gen6_rps_idle(dev_priv);
3638 intel_runtime_pm_put(dev_priv);
3639 out_unlock:
3640 mutex_unlock(&dev->struct_mutex);
3641
3642 out_rearm:
3643 if (rearm_hangcheck) {
3644 GEM_BUG_ON(!dev_priv->gt.awake);
3645 i915_queue_hangcheck(dev_priv);
3646 }
3647 }
3648
i915_gem_close_object(struct drm_gem_object * gem,struct drm_file * file)3649 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
3650 {
3651 struct drm_i915_private *i915 = to_i915(gem->dev);
3652 struct drm_i915_gem_object *obj = to_intel_bo(gem);
3653 struct drm_i915_file_private *fpriv = file->driver_priv;
3654 struct i915_lut_handle *lut, *ln;
3655
3656 mutex_lock(&i915->drm.struct_mutex);
3657
3658 list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) {
3659 struct i915_gem_context *ctx = lut->ctx;
3660 struct i915_vma *vma;
3661
3662 GEM_BUG_ON(ctx->file_priv == ERR_PTR(-EBADF));
3663 if (ctx->file_priv != fpriv)
3664 continue;
3665
3666 vma = radix_tree_delete(&ctx->handles_vma, lut->handle);
3667 GEM_BUG_ON(vma->obj != obj);
3668
3669 /* We allow the process to have multiple handles to the same
3670 * vma, in the same fd namespace, by virtue of flink/open.
3671 */
3672 GEM_BUG_ON(!vma->open_count);
3673 if (!--vma->open_count && !i915_vma_is_ggtt(vma))
3674 i915_vma_close(vma);
3675
3676 list_del(&lut->obj_link);
3677 list_del(&lut->ctx_link);
3678
3679 kmem_cache_free(i915->luts, lut);
3680 __i915_gem_object_release_unless_active(obj);
3681 }
3682
3683 mutex_unlock(&i915->drm.struct_mutex);
3684 }
3685
to_wait_timeout(s64 timeout_ns)3686 static unsigned long to_wait_timeout(s64 timeout_ns)
3687 {
3688 if (timeout_ns < 0)
3689 return MAX_SCHEDULE_TIMEOUT;
3690
3691 if (timeout_ns == 0)
3692 return 0;
3693
3694 return nsecs_to_jiffies_timeout(timeout_ns);
3695 }
3696
3697 /**
3698 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
3699 * @dev: drm device pointer
3700 * @data: ioctl data blob
3701 * @file: drm file pointer
3702 *
3703 * Returns 0 if successful, else an error is returned with the remaining time in
3704 * the timeout parameter.
3705 * -ETIME: object is still busy after timeout
3706 * -ERESTARTSYS: signal interrupted the wait
3707 * -ENONENT: object doesn't exist
3708 * Also possible, but rare:
3709 * -EAGAIN: incomplete, restart syscall
3710 * -ENOMEM: damn
3711 * -ENODEV: Internal IRQ fail
3712 * -E?: The add request failed
3713 *
3714 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
3715 * non-zero timeout parameter the wait ioctl will wait for the given number of
3716 * nanoseconds on an object becoming unbusy. Since the wait itself does so
3717 * without holding struct_mutex the object may become re-busied before this
3718 * function completes. A similar but shorter * race condition exists in the busy
3719 * ioctl
3720 */
3721 int
i915_gem_wait_ioctl(struct drm_device * dev,void * data,struct drm_file * file)3722 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
3723 {
3724 struct drm_i915_gem_wait *args = data;
3725 struct drm_i915_gem_object *obj;
3726 ktime_t start;
3727 long ret;
3728
3729 if (args->flags != 0)
3730 return -EINVAL;
3731
3732 obj = i915_gem_object_lookup(file, args->bo_handle);
3733 if (!obj)
3734 return -ENOENT;
3735
3736 start = ktime_get();
3737
3738 ret = i915_gem_object_wait(obj,
3739 I915_WAIT_INTERRUPTIBLE | I915_WAIT_ALL,
3740 to_wait_timeout(args->timeout_ns),
3741 to_rps_client(file));
3742
3743 if (args->timeout_ns > 0) {
3744 args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start));
3745 if (args->timeout_ns < 0)
3746 args->timeout_ns = 0;
3747
3748 /*
3749 * Apparently ktime isn't accurate enough and occasionally has a
3750 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
3751 * things up to make the test happy. We allow up to 1 jiffy.
3752 *
3753 * This is a regression from the timespec->ktime conversion.
3754 */
3755 if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns))
3756 args->timeout_ns = 0;
3757
3758 /*
3759 * Apparently ktime isn't accurate enough and occasionally has a
3760 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
3761 * things up to make the test happy. We allow up to 1 jiffy.
3762 *
3763 * This is a regression from the timespec->ktime conversion.
3764 */
3765 if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns))
3766 args->timeout_ns = 0;
3767
3768 /* Asked to wait beyond the jiffie/scheduler precision? */
3769 if (ret == -ETIME && args->timeout_ns)
3770 ret = -EAGAIN;
3771 }
3772
3773 i915_gem_object_put(obj);
3774 return ret;
3775 }
3776
wait_for_timeline(struct i915_gem_timeline * tl,unsigned int flags)3777 static int wait_for_timeline(struct i915_gem_timeline *tl, unsigned int flags)
3778 {
3779 int ret, i;
3780
3781 for (i = 0; i < ARRAY_SIZE(tl->engine); i++) {
3782 ret = i915_gem_active_wait(&tl->engine[i].last_request, flags);
3783 if (ret)
3784 return ret;
3785 }
3786
3787 return 0;
3788 }
3789
wait_for_engines(struct drm_i915_private * i915)3790 static int wait_for_engines(struct drm_i915_private *i915)
3791 {
3792 if (wait_for(intel_engines_are_idle(i915), 50)) {
3793 DRM_ERROR("Failed to idle engines, declaring wedged!\n");
3794 i915_gem_set_wedged(i915);
3795 return -EIO;
3796 }
3797
3798 return 0;
3799 }
3800
i915_gem_wait_for_idle(struct drm_i915_private * i915,unsigned int flags)3801 int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags)
3802 {
3803 int ret;
3804
3805 /* If the device is asleep, we have no requests outstanding */
3806 if (!READ_ONCE(i915->gt.awake))
3807 return 0;
3808
3809 if (flags & I915_WAIT_LOCKED) {
3810 struct i915_gem_timeline *tl;
3811
3812 lockdep_assert_held(&i915->drm.struct_mutex);
3813
3814 list_for_each_entry(tl, &i915->gt.timelines, link) {
3815 ret = wait_for_timeline(tl, flags);
3816 if (ret)
3817 return ret;
3818 }
3819
3820 i915_gem_retire_requests(i915);
3821 GEM_BUG_ON(i915->gt.active_requests);
3822
3823 ret = wait_for_engines(i915);
3824 } else {
3825 ret = wait_for_timeline(&i915->gt.global_timeline, flags);
3826 }
3827
3828 return ret;
3829 }
3830
__i915_gem_object_flush_for_display(struct drm_i915_gem_object * obj)3831 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
3832 {
3833 /*
3834 * We manually flush the CPU domain so that we can override and
3835 * force the flush for the display, and perform it asyncrhonously.
3836 */
3837 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
3838 if (obj->cache_dirty)
3839 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
3840 obj->base.write_domain = 0;
3841 }
3842
i915_gem_object_flush_if_display(struct drm_i915_gem_object * obj)3843 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
3844 {
3845 if (!READ_ONCE(obj->pin_global))
3846 return;
3847
3848 mutex_lock(&obj->base.dev->struct_mutex);
3849 __i915_gem_object_flush_for_display(obj);
3850 mutex_unlock(&obj->base.dev->struct_mutex);
3851 }
3852
3853 /**
3854 * Moves a single object to the WC read, and possibly write domain.
3855 * @obj: object to act on
3856 * @write: ask for write access or read only
3857 *
3858 * This function returns when the move is complete, including waiting on
3859 * flushes to occur.
3860 */
3861 int
i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object * obj,bool write)3862 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
3863 {
3864 int ret;
3865
3866 lockdep_assert_held(&obj->base.dev->struct_mutex);
3867
3868 ret = i915_gem_object_wait(obj,
3869 I915_WAIT_INTERRUPTIBLE |
3870 I915_WAIT_LOCKED |
3871 (write ? I915_WAIT_ALL : 0),
3872 MAX_SCHEDULE_TIMEOUT,
3873 NULL);
3874 if (ret)
3875 return ret;
3876
3877 if (obj->base.write_domain == I915_GEM_DOMAIN_WC)
3878 return 0;
3879
3880 /* Flush and acquire obj->pages so that we are coherent through
3881 * direct access in memory with previous cached writes through
3882 * shmemfs and that our cache domain tracking remains valid.
3883 * For example, if the obj->filp was moved to swap without us
3884 * being notified and releasing the pages, we would mistakenly
3885 * continue to assume that the obj remained out of the CPU cached
3886 * domain.
3887 */
3888 ret = i915_gem_object_pin_pages(obj);
3889 if (ret)
3890 return ret;
3891
3892 flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
3893
3894 /* Serialise direct access to this object with the barriers for
3895 * coherent writes from the GPU, by effectively invalidating the
3896 * WC domain upon first access.
3897 */
3898 if ((obj->base.read_domains & I915_GEM_DOMAIN_WC) == 0)
3899 mb();
3900
3901 /* It should now be out of any other write domains, and we can update
3902 * the domain values for our changes.
3903 */
3904 GEM_BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_WC) != 0);
3905 obj->base.read_domains |= I915_GEM_DOMAIN_WC;
3906 if (write) {
3907 obj->base.read_domains = I915_GEM_DOMAIN_WC;
3908 obj->base.write_domain = I915_GEM_DOMAIN_WC;
3909 obj->mm.dirty = true;
3910 }
3911
3912 i915_gem_object_unpin_pages(obj);
3913 return 0;
3914 }
3915
3916 /**
3917 * Moves a single object to the GTT read, and possibly write domain.
3918 * @obj: object to act on
3919 * @write: ask for write access or read only
3920 *
3921 * This function returns when the move is complete, including waiting on
3922 * flushes to occur.
3923 */
3924 int
i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object * obj,bool write)3925 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
3926 {
3927 int ret;
3928
3929 lockdep_assert_held(&obj->base.dev->struct_mutex);
3930
3931 ret = i915_gem_object_wait(obj,
3932 I915_WAIT_INTERRUPTIBLE |
3933 I915_WAIT_LOCKED |
3934 (write ? I915_WAIT_ALL : 0),
3935 MAX_SCHEDULE_TIMEOUT,
3936 NULL);
3937 if (ret)
3938 return ret;
3939
3940 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
3941 return 0;
3942
3943 /* Flush and acquire obj->pages so that we are coherent through
3944 * direct access in memory with previous cached writes through
3945 * shmemfs and that our cache domain tracking remains valid.
3946 * For example, if the obj->filp was moved to swap without us
3947 * being notified and releasing the pages, we would mistakenly
3948 * continue to assume that the obj remained out of the CPU cached
3949 * domain.
3950 */
3951 ret = i915_gem_object_pin_pages(obj);
3952 if (ret)
3953 return ret;
3954
3955 flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
3956
3957 /* Serialise direct access to this object with the barriers for
3958 * coherent writes from the GPU, by effectively invalidating the
3959 * GTT domain upon first access.
3960 */
3961 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
3962 mb();
3963
3964 /* It should now be out of any other write domains, and we can update
3965 * the domain values for our changes.
3966 */
3967 GEM_BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
3968 obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
3969 if (write) {
3970 obj->base.read_domains = I915_GEM_DOMAIN_GTT;
3971 obj->base.write_domain = I915_GEM_DOMAIN_GTT;
3972 obj->mm.dirty = true;
3973 }
3974
3975 i915_gem_object_unpin_pages(obj);
3976 return 0;
3977 }
3978
3979 /**
3980 * Changes the cache-level of an object across all VMA.
3981 * @obj: object to act on
3982 * @cache_level: new cache level to set for the object
3983 *
3984 * After this function returns, the object will be in the new cache-level
3985 * across all GTT and the contents of the backing storage will be coherent,
3986 * with respect to the new cache-level. In order to keep the backing storage
3987 * coherent for all users, we only allow a single cache level to be set
3988 * globally on the object and prevent it from being changed whilst the
3989 * hardware is reading from the object. That is if the object is currently
3990 * on the scanout it will be set to uncached (or equivalent display
3991 * cache coherency) and all non-MOCS GPU access will also be uncached so
3992 * that all direct access to the scanout remains coherent.
3993 */
i915_gem_object_set_cache_level(struct drm_i915_gem_object * obj,enum i915_cache_level cache_level)3994 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
3995 enum i915_cache_level cache_level)
3996 {
3997 struct i915_vma *vma;
3998 int ret;
3999
4000 lockdep_assert_held(&obj->base.dev->struct_mutex);
4001
4002 if (obj->cache_level == cache_level)
4003 return 0;
4004
4005 /* Inspect the list of currently bound VMA and unbind any that would
4006 * be invalid given the new cache-level. This is principally to
4007 * catch the issue of the CS prefetch crossing page boundaries and
4008 * reading an invalid PTE on older architectures.
4009 */
4010 restart:
4011 list_for_each_entry(vma, &obj->vma_list, obj_link) {
4012 if (!drm_mm_node_allocated(&vma->node))
4013 continue;
4014
4015 if (i915_vma_is_pinned(vma)) {
4016 DRM_DEBUG("can not change the cache level of pinned objects\n");
4017 return -EBUSY;
4018 }
4019
4020 if (i915_gem_valid_gtt_space(vma, cache_level))
4021 continue;
4022
4023 ret = i915_vma_unbind(vma);
4024 if (ret)
4025 return ret;
4026
4027 /* As unbinding may affect other elements in the
4028 * obj->vma_list (due to side-effects from retiring
4029 * an active vma), play safe and restart the iterator.
4030 */
4031 goto restart;
4032 }
4033
4034 /* We can reuse the existing drm_mm nodes but need to change the
4035 * cache-level on the PTE. We could simply unbind them all and
4036 * rebind with the correct cache-level on next use. However since
4037 * we already have a valid slot, dma mapping, pages etc, we may as
4038 * rewrite the PTE in the belief that doing so tramples upon less
4039 * state and so involves less work.
4040 */
4041 if (obj->bind_count) {
4042 /* Before we change the PTE, the GPU must not be accessing it.
4043 * If we wait upon the object, we know that all the bound
4044 * VMA are no longer active.
4045 */
4046 ret = i915_gem_object_wait(obj,
4047 I915_WAIT_INTERRUPTIBLE |
4048 I915_WAIT_LOCKED |
4049 I915_WAIT_ALL,
4050 MAX_SCHEDULE_TIMEOUT,
4051 NULL);
4052 if (ret)
4053 return ret;
4054
4055 if (!HAS_LLC(to_i915(obj->base.dev)) &&
4056 cache_level != I915_CACHE_NONE) {
4057 /* Access to snoopable pages through the GTT is
4058 * incoherent and on some machines causes a hard
4059 * lockup. Relinquish the CPU mmaping to force
4060 * userspace to refault in the pages and we can
4061 * then double check if the GTT mapping is still
4062 * valid for that pointer access.
4063 */
4064 i915_gem_release_mmap(obj);
4065
4066 /* As we no longer need a fence for GTT access,
4067 * we can relinquish it now (and so prevent having
4068 * to steal a fence from someone else on the next
4069 * fence request). Note GPU activity would have
4070 * dropped the fence as all snoopable access is
4071 * supposed to be linear.
4072 */
4073 list_for_each_entry(vma, &obj->vma_list, obj_link) {
4074 ret = i915_vma_put_fence(vma);
4075 if (ret)
4076 return ret;
4077 }
4078 } else {
4079 /* We either have incoherent backing store and
4080 * so no GTT access or the architecture is fully
4081 * coherent. In such cases, existing GTT mmaps
4082 * ignore the cache bit in the PTE and we can
4083 * rewrite it without confusing the GPU or having
4084 * to force userspace to fault back in its mmaps.
4085 */
4086 }
4087
4088 list_for_each_entry(vma, &obj->vma_list, obj_link) {
4089 if (!drm_mm_node_allocated(&vma->node))
4090 continue;
4091
4092 ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
4093 if (ret)
4094 return ret;
4095 }
4096 }
4097
4098 list_for_each_entry(vma, &obj->vma_list, obj_link)
4099 vma->node.color = cache_level;
4100 i915_gem_object_set_cache_coherency(obj, cache_level);
4101 obj->cache_dirty = true; /* Always invalidate stale cachelines */
4102
4103 return 0;
4104 }
4105
i915_gem_get_caching_ioctl(struct drm_device * dev,void * data,struct drm_file * file)4106 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
4107 struct drm_file *file)
4108 {
4109 struct drm_i915_gem_caching *args = data;
4110 struct drm_i915_gem_object *obj;
4111 int err = 0;
4112
4113 rcu_read_lock();
4114 obj = i915_gem_object_lookup_rcu(file, args->handle);
4115 if (!obj) {
4116 err = -ENOENT;
4117 goto out;
4118 }
4119
4120 switch (obj->cache_level) {
4121 case I915_CACHE_LLC:
4122 case I915_CACHE_L3_LLC:
4123 args->caching = I915_CACHING_CACHED;
4124 break;
4125
4126 case I915_CACHE_WT:
4127 args->caching = I915_CACHING_DISPLAY;
4128 break;
4129
4130 default:
4131 args->caching = I915_CACHING_NONE;
4132 break;
4133 }
4134 out:
4135 rcu_read_unlock();
4136 return err;
4137 }
4138
i915_gem_set_caching_ioctl(struct drm_device * dev,void * data,struct drm_file * file)4139 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
4140 struct drm_file *file)
4141 {
4142 struct drm_i915_private *i915 = to_i915(dev);
4143 struct drm_i915_gem_caching *args = data;
4144 struct drm_i915_gem_object *obj;
4145 enum i915_cache_level level;
4146 int ret = 0;
4147
4148 switch (args->caching) {
4149 case I915_CACHING_NONE:
4150 level = I915_CACHE_NONE;
4151 break;
4152 case I915_CACHING_CACHED:
4153 /*
4154 * Due to a HW issue on BXT A stepping, GPU stores via a
4155 * snooped mapping may leave stale data in a corresponding CPU
4156 * cacheline, whereas normally such cachelines would get
4157 * invalidated.
4158 */
4159 if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
4160 return -ENODEV;
4161
4162 level = I915_CACHE_LLC;
4163 break;
4164 case I915_CACHING_DISPLAY:
4165 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
4166 break;
4167 default:
4168 return -EINVAL;
4169 }
4170
4171 obj = i915_gem_object_lookup(file, args->handle);
4172 if (!obj)
4173 return -ENOENT;
4174
4175 if (obj->cache_level == level)
4176 goto out;
4177
4178 ret = i915_gem_object_wait(obj,
4179 I915_WAIT_INTERRUPTIBLE,
4180 MAX_SCHEDULE_TIMEOUT,
4181 to_rps_client(file));
4182 if (ret)
4183 goto out;
4184
4185 ret = i915_mutex_lock_interruptible(dev);
4186 if (ret)
4187 goto out;
4188
4189 ret = i915_gem_object_set_cache_level(obj, level);
4190 mutex_unlock(&dev->struct_mutex);
4191
4192 out:
4193 i915_gem_object_put(obj);
4194 return ret;
4195 }
4196
4197 /*
4198 * Prepare buffer for display plane (scanout, cursors, etc).
4199 * Can be called from an uninterruptible phase (modesetting) and allows
4200 * any flushes to be pipelined (for pageflips).
4201 */
4202 struct i915_vma *
i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object * obj,u32 alignment,const struct i915_ggtt_view * view)4203 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
4204 u32 alignment,
4205 const struct i915_ggtt_view *view)
4206 {
4207 struct i915_vma *vma;
4208 int ret;
4209
4210 lockdep_assert_held(&obj->base.dev->struct_mutex);
4211
4212 /* Mark the global pin early so that we account for the
4213 * display coherency whilst setting up the cache domains.
4214 */
4215 obj->pin_global++;
4216
4217 /* The display engine is not coherent with the LLC cache on gen6. As
4218 * a result, we make sure that the pinning that is about to occur is
4219 * done with uncached PTEs. This is lowest common denominator for all
4220 * chipsets.
4221 *
4222 * However for gen6+, we could do better by using the GFDT bit instead
4223 * of uncaching, which would allow us to flush all the LLC-cached data
4224 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
4225 */
4226 ret = i915_gem_object_set_cache_level(obj,
4227 HAS_WT(to_i915(obj->base.dev)) ?
4228 I915_CACHE_WT : I915_CACHE_NONE);
4229 if (ret) {
4230 vma = ERR_PTR(ret);
4231 goto err_unpin_global;
4232 }
4233
4234 /* As the user may map the buffer once pinned in the display plane
4235 * (e.g. libkms for the bootup splash), we have to ensure that we
4236 * always use map_and_fenceable for all scanout buffers. However,
4237 * it may simply be too big to fit into mappable, in which case
4238 * put it anyway and hope that userspace can cope (but always first
4239 * try to preserve the existing ABI).
4240 */
4241 vma = ERR_PTR(-ENOSPC);
4242 if (!view || view->type == I915_GGTT_VIEW_NORMAL)
4243 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
4244 PIN_MAPPABLE | PIN_NONBLOCK);
4245 if (IS_ERR(vma)) {
4246 struct drm_i915_private *i915 = to_i915(obj->base.dev);
4247 unsigned int flags;
4248
4249 /* Valleyview is definitely limited to scanning out the first
4250 * 512MiB. Lets presume this behaviour was inherited from the
4251 * g4x display engine and that all earlier gen are similarly
4252 * limited. Testing suggests that it is a little more
4253 * complicated than this. For example, Cherryview appears quite
4254 * happy to scanout from anywhere within its global aperture.
4255 */
4256 flags = 0;
4257 if (HAS_GMCH_DISPLAY(i915))
4258 flags = PIN_MAPPABLE;
4259 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
4260 }
4261 if (IS_ERR(vma))
4262 goto err_unpin_global;
4263
4264 vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
4265
4266 /* Treat this as an end-of-frame, like intel_user_framebuffer_dirty() */
4267 __i915_gem_object_flush_for_display(obj);
4268 intel_fb_obj_flush(obj, ORIGIN_DIRTYFB);
4269
4270 /* It should now be out of any other write domains, and we can update
4271 * the domain values for our changes.
4272 */
4273 obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
4274
4275 return vma;
4276
4277 err_unpin_global:
4278 obj->pin_global--;
4279 return vma;
4280 }
4281
4282 void
i915_gem_object_unpin_from_display_plane(struct i915_vma * vma)4283 i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
4284 {
4285 lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
4286
4287 if (WARN_ON(vma->obj->pin_global == 0))
4288 return;
4289
4290 if (--vma->obj->pin_global == 0)
4291 vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
4292
4293 /* Bump the LRU to try and avoid premature eviction whilst flipping */
4294 i915_gem_object_bump_inactive_ggtt(vma->obj);
4295
4296 i915_vma_unpin(vma);
4297 }
4298
4299 /**
4300 * Moves a single object to the CPU read, and possibly write domain.
4301 * @obj: object to act on
4302 * @write: requesting write or read-only access
4303 *
4304 * This function returns when the move is complete, including waiting on
4305 * flushes to occur.
4306 */
4307 int
i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object * obj,bool write)4308 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
4309 {
4310 int ret;
4311
4312 lockdep_assert_held(&obj->base.dev->struct_mutex);
4313
4314 ret = i915_gem_object_wait(obj,
4315 I915_WAIT_INTERRUPTIBLE |
4316 I915_WAIT_LOCKED |
4317 (write ? I915_WAIT_ALL : 0),
4318 MAX_SCHEDULE_TIMEOUT,
4319 NULL);
4320 if (ret)
4321 return ret;
4322
4323 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
4324
4325 /* Flush the CPU cache if it's still invalid. */
4326 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
4327 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
4328 obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
4329 }
4330
4331 /* It should now be out of any other write domains, and we can update
4332 * the domain values for our changes.
4333 */
4334 GEM_BUG_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
4335
4336 /* If we're writing through the CPU, then the GPU read domains will
4337 * need to be invalidated at next use.
4338 */
4339 if (write)
4340 __start_cpu_write(obj);
4341
4342 return 0;
4343 }
4344
4345 /* Throttle our rendering by waiting until the ring has completed our requests
4346 * emitted over 20 msec ago.
4347 *
4348 * Note that if we were to use the current jiffies each time around the loop,
4349 * we wouldn't escape the function with any frames outstanding if the time to
4350 * render a frame was over 20ms.
4351 *
4352 * This should get us reasonable parallelism between CPU and GPU but also
4353 * relatively low latency when blocking on a particular request to finish.
4354 */
4355 static int
i915_gem_ring_throttle(struct drm_device * dev,struct drm_file * file)4356 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
4357 {
4358 struct drm_i915_private *dev_priv = to_i915(dev);
4359 struct drm_i915_file_private *file_priv = file->driver_priv;
4360 unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
4361 struct drm_i915_gem_request *request, *target = NULL;
4362 long ret;
4363
4364 /* ABI: return -EIO if already wedged */
4365 if (i915_terminally_wedged(&dev_priv->gpu_error))
4366 return -EIO;
4367
4368 lockmgr(&file_priv->mm.lock, LK_EXCLUSIVE);
4369 list_for_each_entry(request, &file_priv->mm.request_list, client_link) {
4370 if (time_after_eq(request->emitted_jiffies, recent_enough))
4371 break;
4372
4373 if (target) {
4374 list_del(&target->client_link);
4375 target->file_priv = NULL;
4376 }
4377
4378 target = request;
4379 }
4380 if (target)
4381 i915_gem_request_get(target);
4382 lockmgr(&file_priv->mm.lock, LK_RELEASE);
4383
4384 if (target == NULL)
4385 return 0;
4386
4387 ret = i915_wait_request(target,
4388 I915_WAIT_INTERRUPTIBLE,
4389 MAX_SCHEDULE_TIMEOUT);
4390 i915_gem_request_put(target);
4391
4392 return ret < 0 ? ret : 0;
4393 }
4394
4395 struct i915_vma *
i915_gem_object_ggtt_pin(struct drm_i915_gem_object * obj,const struct i915_ggtt_view * view,u64 size,u64 alignment,u64 flags)4396 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
4397 const struct i915_ggtt_view *view,
4398 u64 size,
4399 u64 alignment,
4400 u64 flags)
4401 {
4402 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
4403 struct i915_address_space *vm = &dev_priv->ggtt.base;
4404 struct i915_vma *vma;
4405 int ret;
4406
4407 lockdep_assert_held(&obj->base.dev->struct_mutex);
4408
4409 if (!view && flags & PIN_MAPPABLE) {
4410 /* If the required space is larger than the available
4411 * aperture, we will not able to find a slot for the
4412 * object and unbinding the object now will be in
4413 * vain. Worse, doing so may cause us to ping-pong
4414 * the object in and out of the Global GTT and
4415 * waste a lot of cycles under the mutex.
4416 */
4417 if (obj->base.size > dev_priv->ggtt.mappable_end)
4418 return ERR_PTR(-E2BIG);
4419
4420 /* If NONBLOCK is set the caller is optimistically
4421 * trying to cache the full object within the mappable
4422 * aperture, and *must* have a fallback in place for
4423 * situations where we cannot bind the object. We
4424 * can be a little more lax here and use the fallback
4425 * more often to avoid costly migrations of ourselves
4426 * and other objects within the aperture.
4427 *
4428 * Half-the-aperture is used as a simple heuristic.
4429 * More interesting would to do search for a free
4430 * block prior to making the commitment to unbind.
4431 * That caters for the self-harm case, and with a
4432 * little more heuristics (e.g. NOFAULT, NOEVICT)
4433 * we could try to minimise harm to others.
4434 */
4435 if (flags & PIN_NONBLOCK &&
4436 obj->base.size > dev_priv->ggtt.mappable_end / 2)
4437 return ERR_PTR(-ENOSPC);
4438 }
4439
4440 vma = i915_vma_instance(obj, vm, view);
4441 if (unlikely(IS_ERR(vma)))
4442 return vma;
4443
4444 if (i915_vma_misplaced(vma, size, alignment, flags)) {
4445 if (flags & PIN_NONBLOCK) {
4446 if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma))
4447 return ERR_PTR(-ENOSPC);
4448
4449 if (flags & PIN_MAPPABLE &&
4450 vma->fence_size > dev_priv->ggtt.mappable_end / 2)
4451 return ERR_PTR(-ENOSPC);
4452 }
4453
4454 WARN(i915_vma_is_pinned(vma),
4455 "bo is already pinned in ggtt with incorrect alignment:"
4456 " offset=%08x, req.alignment=%llx,"
4457 " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n",
4458 i915_ggtt_offset(vma), alignment,
4459 !!(flags & PIN_MAPPABLE),
4460 i915_vma_is_map_and_fenceable(vma));
4461 ret = i915_vma_unbind(vma);
4462 if (ret)
4463 return ERR_PTR(ret);
4464 }
4465
4466 ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
4467 if (ret)
4468 return ERR_PTR(ret);
4469
4470 return vma;
4471 }
4472
__busy_read_flag(unsigned int id)4473 static __always_inline unsigned int __busy_read_flag(unsigned int id)
4474 {
4475 /* Note that we could alias engines in the execbuf API, but
4476 * that would be very unwise as it prevents userspace from
4477 * fine control over engine selection. Ahem.
4478 *
4479 * This should be something like EXEC_MAX_ENGINE instead of
4480 * I915_NUM_ENGINES.
4481 */
4482 BUILD_BUG_ON(I915_NUM_ENGINES > 16);
4483 return 0x10000 << id;
4484 }
4485
__busy_write_id(unsigned int id)4486 static __always_inline unsigned int __busy_write_id(unsigned int id)
4487 {
4488 /* The uABI guarantees an active writer is also amongst the read
4489 * engines. This would be true if we accessed the activity tracking
4490 * under the lock, but as we perform the lookup of the object and
4491 * its activity locklessly we can not guarantee that the last_write
4492 * being active implies that we have set the same engine flag from
4493 * last_read - hence we always set both read and write busy for
4494 * last_write.
4495 */
4496 return id | __busy_read_flag(id);
4497 }
4498
4499 #pragma GCC diagnostic push
4500 #pragma GCC diagnostic ignored "-Wdiscarded-qualifiers"
4501
4502 static __always_inline unsigned int
__busy_set_if_active(const struct dma_fence * fence,unsigned int (* flag)(unsigned int id))4503 __busy_set_if_active(const struct dma_fence *fence,
4504 unsigned int (*flag)(unsigned int id))
4505 {
4506 struct drm_i915_gem_request *rq;
4507
4508 /* We have to check the current hw status of the fence as the uABI
4509 * guarantees forward progress. We could rely on the idle worker
4510 * to eventually flush us, but to minimise latency just ask the
4511 * hardware.
4512 *
4513 * Note we only report on the status of native fences.
4514 */
4515 if (!dma_fence_is_i915(fence))
4516 return 0;
4517
4518 /* opencode to_request() in order to avoid const warnings */
4519 rq = container_of(fence, struct drm_i915_gem_request, fence);
4520 if (i915_gem_request_completed(rq))
4521 return 0;
4522
4523 return flag(rq->engine->uabi_id);
4524 }
4525 #pragma GCC diagnostic pop
4526
4527 static __always_inline unsigned int
busy_check_reader(const struct dma_fence * fence)4528 busy_check_reader(const struct dma_fence *fence)
4529 {
4530 return __busy_set_if_active(fence, __busy_read_flag);
4531 }
4532
4533 static __always_inline unsigned int
busy_check_writer(const struct dma_fence * fence)4534 busy_check_writer(const struct dma_fence *fence)
4535 {
4536 if (!fence)
4537 return 0;
4538
4539 return __busy_set_if_active(fence, __busy_write_id);
4540 }
4541
4542 int
i915_gem_busy_ioctl(struct drm_device * dev,void * data,struct drm_file * file)4543 i915_gem_busy_ioctl(struct drm_device *dev, void *data,
4544 struct drm_file *file)
4545 {
4546 struct drm_i915_gem_busy *args = data;
4547 struct drm_i915_gem_object *obj;
4548 struct reservation_object_list *list;
4549 unsigned int seq;
4550 int err;
4551
4552 err = -ENOENT;
4553 rcu_read_lock();
4554 obj = i915_gem_object_lookup_rcu(file, args->handle);
4555 if (!obj)
4556 goto out;
4557
4558 /* A discrepancy here is that we do not report the status of
4559 * non-i915 fences, i.e. even though we may report the object as idle,
4560 * a call to set-domain may still stall waiting for foreign rendering.
4561 * This also means that wait-ioctl may report an object as busy,
4562 * where busy-ioctl considers it idle.
4563 *
4564 * We trade the ability to warn of foreign fences to report on which
4565 * i915 engines are active for the object.
4566 *
4567 * Alternatively, we can trade that extra information on read/write
4568 * activity with
4569 * args->busy =
4570 * !reservation_object_test_signaled_rcu(obj->resv, true);
4571 * to report the overall busyness. This is what the wait-ioctl does.
4572 *
4573 */
4574 retry:
4575 seq = raw_read_seqcount(&obj->resv->seq);
4576
4577 /* Translate the exclusive fence to the READ *and* WRITE engine */
4578 args->busy = busy_check_writer(rcu_dereference(obj->resv->fence_excl));
4579
4580 /* Translate shared fences to READ set of engines */
4581 list = rcu_dereference(obj->resv->fence);
4582 if (list) {
4583 unsigned int shared_count = list->shared_count, i;
4584
4585 for (i = 0; i < shared_count; ++i) {
4586 struct dma_fence *fence =
4587 rcu_dereference(list->shared[i]);
4588
4589 args->busy |= busy_check_reader(fence);
4590 }
4591 }
4592
4593 if (args->busy && read_seqcount_retry(&obj->resv->seq, seq))
4594 goto retry;
4595
4596 err = 0;
4597 out:
4598 rcu_read_unlock();
4599 return err;
4600 }
4601
4602 int
i915_gem_throttle_ioctl(struct drm_device * dev,void * data,struct drm_file * file_priv)4603 i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
4604 struct drm_file *file_priv)
4605 {
4606 return i915_gem_ring_throttle(dev, file_priv);
4607 }
4608
4609 int
i915_gem_madvise_ioctl(struct drm_device * dev,void * data,struct drm_file * file_priv)4610 i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
4611 struct drm_file *file_priv)
4612 {
4613 struct drm_i915_private *dev_priv = to_i915(dev);
4614 struct drm_i915_gem_madvise *args = data;
4615 struct drm_i915_gem_object *obj;
4616 int err;
4617
4618 switch (args->madv) {
4619 case I915_MADV_DONTNEED:
4620 case I915_MADV_WILLNEED:
4621 break;
4622 default:
4623 return -EINVAL;
4624 }
4625
4626 obj = i915_gem_object_lookup(file_priv, args->handle);
4627 if (!obj)
4628 return -ENOENT;
4629
4630 err = mutex_lock_interruptible(&obj->mm.lock);
4631 if (err)
4632 goto out;
4633
4634 if (i915_gem_object_has_pages(obj) &&
4635 i915_gem_object_is_tiled(obj) &&
4636 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
4637 if (obj->mm.madv == I915_MADV_WILLNEED) {
4638 GEM_BUG_ON(!obj->mm.quirked);
4639 __i915_gem_object_unpin_pages(obj);
4640 obj->mm.quirked = false;
4641 }
4642 if (args->madv == I915_MADV_WILLNEED) {
4643 GEM_BUG_ON(obj->mm.quirked);
4644 __i915_gem_object_pin_pages(obj);
4645 obj->mm.quirked = true;
4646 }
4647 }
4648
4649 if (obj->mm.madv != __I915_MADV_PURGED)
4650 obj->mm.madv = args->madv;
4651
4652 /* if the object is no longer attached, discard its backing storage */
4653 if (obj->mm.madv == I915_MADV_DONTNEED &&
4654 !i915_gem_object_has_pages(obj))
4655 i915_gem_object_truncate(obj);
4656
4657 args->retained = obj->mm.madv != __I915_MADV_PURGED;
4658 mutex_unlock(&obj->mm.lock);
4659
4660 out:
4661 i915_gem_object_put(obj);
4662 return err;
4663 }
4664
4665 static void
frontbuffer_retire(struct i915_gem_active * active,struct drm_i915_gem_request * request)4666 frontbuffer_retire(struct i915_gem_active *active,
4667 struct drm_i915_gem_request *request)
4668 {
4669 struct drm_i915_gem_object *obj =
4670 container_of(active, typeof(*obj), frontbuffer_write);
4671
4672 intel_fb_obj_flush(obj, ORIGIN_CS);
4673 }
4674
i915_gem_object_init(struct drm_i915_gem_object * obj,const struct drm_i915_gem_object_ops * ops)4675 void i915_gem_object_init(struct drm_i915_gem_object *obj,
4676 const struct drm_i915_gem_object_ops *ops)
4677 {
4678 lockinit(&obj->mm.lock, "i9goml", 0, LK_CANRECURSE);
4679
4680 INIT_LIST_HEAD(&obj->vma_list);
4681 INIT_LIST_HEAD(&obj->lut_list);
4682 INIT_LIST_HEAD(&obj->batch_pool_link);
4683
4684 obj->ops = ops;
4685
4686 reservation_object_init(&obj->__builtin_resv);
4687 obj->resv = &obj->__builtin_resv;
4688
4689 obj->frontbuffer_ggtt_origin = ORIGIN_GTT;
4690 init_request_active(&obj->frontbuffer_write, frontbuffer_retire);
4691
4692 obj->mm.madv = I915_MADV_WILLNEED;
4693 INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN);
4694 lockinit(&obj->mm.get_page.lock, "i915ogpl", 0, LK_CANRECURSE);
4695
4696 i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size);
4697 }
4698
4699 static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
4700 .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
4701 I915_GEM_OBJECT_IS_SHRINKABLE,
4702
4703 .get_pages = i915_gem_object_get_pages_gtt,
4704 .put_pages = i915_gem_object_put_pages_gtt,
4705
4706 .pwrite = i915_gem_object_pwrite_gtt,
4707 };
4708
i915_gem_object_create_shmem(struct drm_device * dev,struct drm_gem_object * obj,size_t size)4709 static int i915_gem_object_create_shmem(struct drm_device *dev,
4710 struct drm_gem_object *obj,
4711 size_t size)
4712 {
4713 #ifndef __DragonFly__
4714 struct drm_i915_private *i915 = to_i915(dev);
4715 unsigned long flags = VM_NORESERVE;
4716 struct file *filp;
4717 #endif
4718
4719 #ifndef __DragonFly__
4720 drm_gem_private_object_init(dev, obj, size);
4721 #else
4722 drm_gem_object_init(dev, obj, size);
4723 #endif
4724
4725 #ifndef __DragonFly__
4726 if (i915->mm.gemfs)
4727 filp = shmem_file_setup_with_mnt(i915->mm.gemfs, "i915", size,
4728 flags);
4729 else
4730 filp = shmem_file_setup("i915", size, flags);
4731
4732 if (IS_ERR(filp))
4733 return PTR_ERR(filp);
4734
4735 obj->filp = filp;
4736 #endif
4737
4738 return 0;
4739 }
4740
4741 struct drm_i915_gem_object *
i915_gem_object_create(struct drm_i915_private * dev_priv,u64 size)4742 i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
4743 {
4744 struct drm_i915_gem_object *obj;
4745 #if 0
4746 struct address_space *mapping;
4747 #endif
4748 unsigned int cache_level;
4749 gfp_t mask;
4750 int ret;
4751
4752 /* There is a prevalence of the assumption that we fit the object's
4753 * page count inside a 32bit _signed_ variable. Let's document this and
4754 * catch if we ever need to fix it. In the meantime, if you do spot
4755 * such a local variable, please consider fixing!
4756 */
4757 if (size >> PAGE_SHIFT > INT_MAX)
4758 return ERR_PTR(-E2BIG);
4759
4760 if (overflows_type(size, obj->base.size))
4761 return ERR_PTR(-E2BIG);
4762
4763 obj = i915_gem_object_alloc(dev_priv);
4764 if (obj == NULL)
4765 return ERR_PTR(-ENOMEM);
4766
4767 ret = i915_gem_object_create_shmem(&dev_priv->drm, &obj->base, size);
4768 if (ret)
4769 goto fail;
4770
4771 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
4772 if (IS_I965GM(dev_priv) || IS_I965G(dev_priv)) {
4773 /* 965gm cannot relocate objects above 4GiB. */
4774 mask &= ~__GFP_HIGHMEM;
4775 mask |= __GFP_DMA32;
4776 }
4777
4778 #if 0
4779 mapping = obj->base.filp->f_mapping;
4780 mapping_set_gfp_mask(mapping, mask);
4781 #endif
4782
4783 i915_gem_object_init(obj, &i915_gem_object_ops);
4784
4785 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4786 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4787
4788 if (HAS_LLC(dev_priv))
4789 /* On some devices, we can have the GPU use the LLC (the CPU
4790 * cache) for about a 10% performance improvement
4791 * compared to uncached. Graphics requests other than
4792 * display scanout are coherent with the CPU in
4793 * accessing this cache. This means in this mode we
4794 * don't need to clflush on the CPU side, and on the
4795 * GPU side we only need to flush internal caches to
4796 * get data visible to the CPU.
4797 *
4798 * However, we maintain the display planes as UC, and so
4799 * need to rebind when first used as such.
4800 */
4801 cache_level = I915_CACHE_LLC;
4802 else
4803 cache_level = I915_CACHE_NONE;
4804
4805 i915_gem_object_set_cache_coherency(obj, cache_level);
4806
4807 trace_i915_gem_object_create(obj);
4808
4809 return obj;
4810
4811 fail:
4812 i915_gem_object_free(obj);
4813 return ERR_PTR(ret);
4814 }
4815
discard_backing_storage(struct drm_i915_gem_object * obj)4816 static bool discard_backing_storage(struct drm_i915_gem_object *obj)
4817 {
4818 /* If we are the last user of the backing storage (be it shmemfs
4819 * pages or stolen etc), we know that the pages are going to be
4820 * immediately released. In this case, we can then skip copying
4821 * back the contents from the GPU.
4822 */
4823
4824 if (obj->mm.madv != I915_MADV_WILLNEED)
4825 return false;
4826
4827 if (obj->base.filp == NULL)
4828 return true;
4829
4830 /* At first glance, this looks racy, but then again so would be
4831 * userspace racing mmap against close. However, the first external
4832 * reference to the filp can only be obtained through the
4833 * i915_gem_mmap_ioctl() which safeguards us against the user
4834 * acquiring such a reference whilst we are in the middle of
4835 * freeing the object.
4836 */
4837 #if 0
4838 return atomic_long_read(&obj->base.filp->f_count) == 1;
4839 #else
4840 return false;
4841 #endif
4842 }
4843
__i915_gem_free_objects(struct drm_i915_private * i915,struct llist_node * freed)4844 static void __i915_gem_free_objects(struct drm_i915_private *i915,
4845 struct llist_node *freed)
4846 {
4847 struct drm_i915_gem_object *obj, *on;
4848
4849 intel_runtime_pm_get(i915);
4850 llist_for_each_entry_safe(obj, on, freed, freed) {
4851 struct i915_vma *vma, *vn;
4852
4853 trace_i915_gem_object_destroy(obj);
4854
4855 mutex_lock(&i915->drm.struct_mutex);
4856
4857 GEM_BUG_ON(i915_gem_object_is_active(obj));
4858 list_for_each_entry_safe(vma, vn,
4859 &obj->vma_list, obj_link) {
4860 GEM_BUG_ON(i915_vma_is_active(vma));
4861 vma->flags &= ~I915_VMA_PIN_MASK;
4862 i915_vma_close(vma);
4863 }
4864 GEM_BUG_ON(!list_empty(&obj->vma_list));
4865 GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma_tree));
4866
4867 /* This serializes freeing with the shrinker. Since the free
4868 * is delayed, first by RCU then by the workqueue, we want the
4869 * shrinker to be able to free pages of unreferenced objects,
4870 * or else we may oom whilst there are plenty of deferred
4871 * freed objects.
4872 */
4873 if (i915_gem_object_has_pages(obj)) {
4874 lockmgr(&i915->mm.obj_lock, LK_EXCLUSIVE);
4875 list_del_init(&obj->mm.link);
4876 lockmgr(&i915->mm.obj_lock, LK_RELEASE);
4877 }
4878
4879 mutex_unlock(&i915->drm.struct_mutex);
4880
4881 GEM_BUG_ON(obj->bind_count);
4882 GEM_BUG_ON(obj->userfault_count);
4883 GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits));
4884 GEM_BUG_ON(!list_empty(&obj->lut_list));
4885
4886 if (obj->ops->release)
4887 obj->ops->release(obj);
4888
4889 #if 0
4890 if (WARN_ON(i915_gem_object_has_pinned_pages(obj)))
4891 #else
4892 if (i915_gem_object_has_pinned_pages(obj))
4893 #endif
4894 atomic_set(&obj->mm.pages_pin_count, 0);
4895 __i915_gem_object_put_pages(obj, I915_MM_NORMAL);
4896 GEM_BUG_ON(i915_gem_object_has_pages(obj));
4897
4898 if (obj->base.import_attach)
4899 drm_prime_gem_destroy(&obj->base, NULL);
4900
4901 reservation_object_fini(&obj->__builtin_resv);
4902 drm_gem_object_release(&obj->base);
4903 i915_gem_info_remove_obj(i915, obj->base.size);
4904
4905 kfree(obj->bit_17);
4906 i915_gem_object_free(obj);
4907
4908 if (on)
4909 cond_resched();
4910 }
4911 intel_runtime_pm_put(i915);
4912 }
4913
i915_gem_flush_free_objects(struct drm_i915_private * i915)4914 static void i915_gem_flush_free_objects(struct drm_i915_private *i915)
4915 {
4916 struct llist_node *freed;
4917
4918 /* Free the oldest, most stale object to keep the free_list short */
4919 freed = NULL;
4920 if (!llist_empty(&i915->mm.free_list)) { /* quick test for hotpath */
4921 /* Only one consumer of llist_del_first() allowed */
4922 lockmgr(&i915->mm.free_lock, LK_EXCLUSIVE);
4923 freed = llist_del_first(&i915->mm.free_list);
4924 lockmgr(&i915->mm.free_lock, LK_RELEASE);
4925 }
4926 if (unlikely(freed)) {
4927 freed->next = NULL;
4928 __i915_gem_free_objects(i915, freed);
4929 }
4930 }
4931
__i915_gem_free_work(struct work_struct * work)4932 static void __i915_gem_free_work(struct work_struct *work)
4933 {
4934 struct drm_i915_private *i915 =
4935 container_of(work, struct drm_i915_private, mm.free_work);
4936 struct llist_node *freed;
4937
4938 /* All file-owned VMA should have been released by this point through
4939 * i915_gem_close_object(), or earlier by i915_gem_context_close().
4940 * However, the object may also be bound into the global GTT (e.g.
4941 * older GPUs without per-process support, or for direct access through
4942 * the GTT either for the user or for scanout). Those VMA still need to
4943 * unbound now.
4944 */
4945
4946 lockmgr(&i915->mm.free_lock, LK_EXCLUSIVE);
4947 while ((freed = llist_del_all(&i915->mm.free_list))) {
4948 lockmgr(&i915->mm.free_lock, LK_RELEASE);
4949
4950 __i915_gem_free_objects(i915, freed);
4951 if (need_resched())
4952 return;
4953
4954 lockmgr(&i915->mm.free_lock, LK_EXCLUSIVE);
4955 }
4956 lockmgr(&i915->mm.free_lock, LK_RELEASE);
4957 }
4958
__i915_gem_free_object_rcu(struct rcu_head * head)4959 static void __i915_gem_free_object_rcu(struct rcu_head *head)
4960 {
4961 struct drm_i915_gem_object *obj =
4962 container_of(head, typeof(*obj), rcu);
4963 struct drm_i915_private *i915 = to_i915(obj->base.dev);
4964
4965 /* We can't simply use call_rcu() from i915_gem_free_object()
4966 * as we need to block whilst unbinding, and the call_rcu
4967 * task may be called from softirq context. So we take a
4968 * detour through a worker.
4969 */
4970 if (llist_add(&obj->freed, &i915->mm.free_list))
4971 schedule_work(&i915->mm.free_work);
4972 }
4973
i915_gem_free_object(struct drm_gem_object * gem_obj)4974 void i915_gem_free_object(struct drm_gem_object *gem_obj)
4975 {
4976 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
4977
4978 if (obj->mm.quirked)
4979 __i915_gem_object_unpin_pages(obj);
4980
4981 if (discard_backing_storage(obj))
4982 obj->mm.madv = I915_MADV_DONTNEED;
4983
4984 /* Before we free the object, make sure any pure RCU-only
4985 * read-side critical sections are complete, e.g.
4986 * i915_gem_busy_ioctl(). For the corresponding synchronized
4987 * lookup see i915_gem_object_lookup_rcu().
4988 */
4989 call_rcu(&obj->rcu, __i915_gem_free_object_rcu);
4990 }
4991
__i915_gem_object_release_unless_active(struct drm_i915_gem_object * obj)4992 void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)
4993 {
4994 lockdep_assert_held(&obj->base.dev->struct_mutex);
4995
4996 if (!i915_gem_object_has_active_reference(obj) &&
4997 i915_gem_object_is_active(obj))
4998 i915_gem_object_set_active_reference(obj);
4999 else
5000 i915_gem_object_put(obj);
5001 }
5002
assert_kernel_context_is_current(struct drm_i915_private * dev_priv)5003 static void assert_kernel_context_is_current(struct drm_i915_private *dev_priv)
5004 {
5005 struct intel_engine_cs *engine;
5006 enum intel_engine_id id;
5007
5008 for_each_engine(engine, dev_priv, id)
5009 GEM_BUG_ON(engine->last_retired_context &&
5010 !i915_gem_context_is_kernel(engine->last_retired_context));
5011 }
5012
i915_gem_sanitize(struct drm_i915_private * i915)5013 void i915_gem_sanitize(struct drm_i915_private *i915)
5014 {
5015 if (i915_terminally_wedged(&i915->gpu_error)) {
5016 mutex_lock(&i915->drm.struct_mutex);
5017 i915_gem_unset_wedged(i915);
5018 mutex_unlock(&i915->drm.struct_mutex);
5019 }
5020
5021 /*
5022 * If we inherit context state from the BIOS or earlier occupants
5023 * of the GPU, the GPU may be in an inconsistent state when we
5024 * try to take over. The only way to remove the earlier state
5025 * is by resetting. However, resetting on earlier gen is tricky as
5026 * it may impact the display and we are uncertain about the stability
5027 * of the reset, so this could be applied to even earlier gen.
5028 */
5029 if (INTEL_GEN(i915) >= 5) {
5030 int reset = intel_gpu_reset(i915, ALL_ENGINES);
5031 WARN_ON(reset && reset != -ENODEV);
5032 }
5033 }
5034
i915_gem_suspend(struct drm_i915_private * dev_priv)5035 int i915_gem_suspend(struct drm_i915_private *dev_priv)
5036 {
5037 struct drm_device *dev = &dev_priv->drm;
5038 int ret;
5039
5040 intel_runtime_pm_get(dev_priv);
5041 intel_suspend_gt_powersave(dev_priv);
5042
5043 mutex_lock(&dev->struct_mutex);
5044
5045 /* We have to flush all the executing contexts to main memory so
5046 * that they can saved in the hibernation image. To ensure the last
5047 * context image is coherent, we have to switch away from it. That
5048 * leaves the dev_priv->kernel_context still active when
5049 * we actually suspend, and its image in memory may not match the GPU
5050 * state. Fortunately, the kernel_context is disposable and we do
5051 * not rely on its state.
5052 */
5053 if (!i915_terminally_wedged(&dev_priv->gpu_error)) {
5054 ret = i915_gem_switch_to_kernel_context(dev_priv);
5055 if (ret)
5056 goto err_unlock;
5057
5058 ret = i915_gem_wait_for_idle(dev_priv,
5059 I915_WAIT_INTERRUPTIBLE |
5060 I915_WAIT_LOCKED);
5061 if (ret && ret != -EIO)
5062 goto err_unlock;
5063
5064 assert_kernel_context_is_current(dev_priv);
5065 }
5066 i915_gem_contexts_lost(dev_priv);
5067 mutex_unlock(&dev->struct_mutex);
5068
5069 intel_guc_suspend(dev_priv);
5070
5071 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
5072 cancel_delayed_work_sync(&dev_priv->gt.retire_work);
5073
5074 /* As the idle_work is rearming if it detects a race, play safe and
5075 * repeat the flush until it is definitely idle.
5076 */
5077 drain_delayed_work(&dev_priv->gt.idle_work);
5078
5079 /* Assert that we sucessfully flushed all the work and
5080 * reset the GPU back to its idle, low power state.
5081 */
5082 WARN_ON(dev_priv->gt.awake);
5083 if (WARN_ON(!intel_engines_are_idle(dev_priv)))
5084 i915_gem_set_wedged(dev_priv); /* no hope, discard everything */
5085
5086 /*
5087 * Neither the BIOS, ourselves or any other kernel
5088 * expects the system to be in execlists mode on startup,
5089 * so we need to reset the GPU back to legacy mode. And the only
5090 * known way to disable logical contexts is through a GPU reset.
5091 *
5092 * So in order to leave the system in a known default configuration,
5093 * always reset the GPU upon unload and suspend. Afterwards we then
5094 * clean up the GEM state tracking, flushing off the requests and
5095 * leaving the system in a known idle state.
5096 *
5097 * Note that is of the upmost importance that the GPU is idle and
5098 * all stray writes are flushed *before* we dismantle the backing
5099 * storage for the pinned objects.
5100 *
5101 * However, since we are uncertain that resetting the GPU on older
5102 * machines is a good idea, we don't - just in case it leaves the
5103 * machine in an unusable condition.
5104 */
5105 i915_gem_sanitize(dev_priv);
5106
5107 intel_runtime_pm_put(dev_priv);
5108 return 0;
5109
5110 err_unlock:
5111 mutex_unlock(&dev->struct_mutex);
5112 intel_runtime_pm_put(dev_priv);
5113 return ret;
5114 }
5115
i915_gem_resume(struct drm_i915_private * dev_priv)5116 void i915_gem_resume(struct drm_i915_private *dev_priv)
5117 {
5118 struct drm_device *dev = &dev_priv->drm;
5119
5120 WARN_ON(dev_priv->gt.awake);
5121
5122 mutex_lock(&dev->struct_mutex);
5123 i915_gem_restore_gtt_mappings(dev_priv);
5124 i915_gem_restore_fences(dev_priv);
5125
5126 /* As we didn't flush the kernel context before suspend, we cannot
5127 * guarantee that the context image is complete. So let's just reset
5128 * it and start again.
5129 */
5130 dev_priv->gt.resume(dev_priv);
5131
5132 mutex_unlock(&dev->struct_mutex);
5133 }
5134
i915_gem_init_swizzling(struct drm_i915_private * dev_priv)5135 void i915_gem_init_swizzling(struct drm_i915_private *dev_priv)
5136 {
5137 if (INTEL_GEN(dev_priv) < 5 ||
5138 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
5139 return;
5140
5141 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
5142 DISP_TILE_SURFACE_SWIZZLING);
5143
5144 if (IS_GEN5(dev_priv))
5145 return;
5146
5147 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
5148 if (IS_GEN6(dev_priv))
5149 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
5150 else if (IS_GEN7(dev_priv))
5151 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
5152 else if (IS_GEN8(dev_priv))
5153 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW));
5154 else
5155 BUG();
5156 }
5157
init_unused_ring(struct drm_i915_private * dev_priv,u32 base)5158 static void init_unused_ring(struct drm_i915_private *dev_priv, u32 base)
5159 {
5160 I915_WRITE(RING_CTL(base), 0);
5161 I915_WRITE(RING_HEAD(base), 0);
5162 I915_WRITE(RING_TAIL(base), 0);
5163 I915_WRITE(RING_START(base), 0);
5164 }
5165
init_unused_rings(struct drm_i915_private * dev_priv)5166 static void init_unused_rings(struct drm_i915_private *dev_priv)
5167 {
5168 if (IS_I830(dev_priv)) {
5169 init_unused_ring(dev_priv, PRB1_BASE);
5170 init_unused_ring(dev_priv, SRB0_BASE);
5171 init_unused_ring(dev_priv, SRB1_BASE);
5172 init_unused_ring(dev_priv, SRB2_BASE);
5173 init_unused_ring(dev_priv, SRB3_BASE);
5174 } else if (IS_GEN2(dev_priv)) {
5175 init_unused_ring(dev_priv, SRB0_BASE);
5176 init_unused_ring(dev_priv, SRB1_BASE);
5177 } else if (IS_GEN3(dev_priv)) {
5178 init_unused_ring(dev_priv, PRB1_BASE);
5179 init_unused_ring(dev_priv, PRB2_BASE);
5180 }
5181 }
5182
__i915_gem_restart_engines(void * data)5183 static int __i915_gem_restart_engines(void *data)
5184 {
5185 struct drm_i915_private *i915 = data;
5186 struct intel_engine_cs *engine;
5187 enum intel_engine_id id;
5188 int err;
5189
5190 for_each_engine(engine, i915, id) {
5191 err = engine->init_hw(engine);
5192 if (err)
5193 return err;
5194 }
5195
5196 return 0;
5197 }
5198
i915_gem_init_hw(struct drm_i915_private * dev_priv)5199 int i915_gem_init_hw(struct drm_i915_private *dev_priv)
5200 {
5201 int ret;
5202
5203 dev_priv->gt.last_init_time = ktime_get();
5204
5205 /* Double layer security blanket, see i915_gem_init() */
5206 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
5207
5208 if (HAS_EDRAM(dev_priv) && INTEL_GEN(dev_priv) < 9)
5209 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf));
5210
5211 if (IS_HASWELL(dev_priv))
5212 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev_priv) ?
5213 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
5214
5215 if (HAS_PCH_NOP(dev_priv)) {
5216 if (IS_IVYBRIDGE(dev_priv)) {
5217 u32 temp = I915_READ(GEN7_MSG_CTL);
5218 temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK);
5219 I915_WRITE(GEN7_MSG_CTL, temp);
5220 } else if (INTEL_GEN(dev_priv) >= 7) {
5221 u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT);
5222 temp &= ~RESET_PCH_HANDSHAKE_ENABLE;
5223 I915_WRITE(HSW_NDE_RSTWRN_OPT, temp);
5224 }
5225 }
5226
5227 i915_gem_init_swizzling(dev_priv);
5228
5229 /*
5230 * At least 830 can leave some of the unused rings
5231 * "active" (ie. head != tail) after resume which
5232 * will prevent c3 entry. Makes sure all unused rings
5233 * are totally idle.
5234 */
5235 init_unused_rings(dev_priv);
5236
5237 BUG_ON(!dev_priv->kernel_context);
5238 if (i915_terminally_wedged(&dev_priv->gpu_error)) {
5239 ret = -EIO;
5240 goto out;
5241 }
5242
5243 ret = i915_ppgtt_init_hw(dev_priv);
5244 if (ret) {
5245 DRM_ERROR("PPGTT enable HW failed %d\n", ret);
5246 goto out;
5247 }
5248
5249 /* Need to do basic initialisation of all rings first: */
5250 ret = __i915_gem_restart_engines(dev_priv);
5251 if (ret)
5252 goto out;
5253
5254 intel_mocs_init_l3cc_table(dev_priv);
5255
5256 /* We can't enable contexts until all firmware is loaded */
5257 ret = intel_uc_init_hw(dev_priv);
5258 if (ret)
5259 goto out;
5260
5261 out:
5262 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5263 return ret;
5264 }
5265
intel_sanitize_semaphores(struct drm_i915_private * dev_priv,int value)5266 bool intel_sanitize_semaphores(struct drm_i915_private *dev_priv, int value)
5267 {
5268 if (INTEL_INFO(dev_priv)->gen < 6)
5269 return false;
5270
5271 /* TODO: make semaphores and Execlists play nicely together */
5272 if (i915_modparams.enable_execlists)
5273 return false;
5274
5275 if (value >= 0)
5276 return value;
5277
5278 /* Enable semaphores on SNB when IO remapping is off */
5279 if (IS_GEN6(dev_priv) && intel_vtd_active())
5280 return false;
5281
5282 return true;
5283 }
5284
i915_gem_init(struct drm_i915_private * dev_priv)5285 int i915_gem_init(struct drm_i915_private *dev_priv)
5286 {
5287 int ret;
5288
5289 /*
5290 * We need to fallback to 4K pages since gvt gtt handling doesn't
5291 * support huge page entries - we will need to check either hypervisor
5292 * mm can support huge guest page or just do emulation in gvt.
5293 */
5294 if (intel_vgpu_active(dev_priv))
5295 mkwrite_device_info(dev_priv)->page_sizes =
5296 I915_GTT_PAGE_SIZE_4K;
5297
5298 dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1);
5299
5300 if (!i915_modparams.enable_execlists) {
5301 dev_priv->gt.resume = intel_legacy_submission_resume;
5302 dev_priv->gt.cleanup_engine = intel_engine_cleanup;
5303 } else {
5304 dev_priv->gt.resume = intel_lr_context_resume;
5305 dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup;
5306 }
5307
5308 ret = i915_gem_init_userptr(dev_priv);
5309 if (ret)
5310 return ret;
5311
5312 /* This is just a security blanket to placate dragons.
5313 * On some systems, we very sporadically observe that the first TLBs
5314 * used by the CS may be stale, despite us poking the TLB reset. If
5315 * we hold the forcewake during initialisation these problems
5316 * just magically go away.
5317 */
5318 mutex_lock(&dev_priv->drm.struct_mutex);
5319 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
5320
5321 ret = i915_gem_init_ggtt(dev_priv);
5322 if (ret)
5323 goto out_unlock;
5324
5325 ret = i915_gem_contexts_init(dev_priv);
5326 if (ret)
5327 goto out_unlock;
5328
5329 ret = intel_engines_init(dev_priv);
5330 if (ret)
5331 goto out_unlock;
5332
5333 ret = i915_gem_init_hw(dev_priv);
5334 if (ret == -EIO) {
5335 /* Allow engine initialisation to fail by marking the GPU as
5336 * wedged. But we only want to do this where the GPU is angry,
5337 * for all other failure, such as an allocation failure, bail.
5338 */
5339 if (!i915_terminally_wedged(&dev_priv->gpu_error)) {
5340 DRM_ERROR("Failed to initialize GPU, declaring it wedged\n");
5341 i915_gem_set_wedged(dev_priv);
5342 }
5343 ret = 0;
5344 }
5345
5346 out_unlock:
5347 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5348 mutex_unlock(&dev_priv->drm.struct_mutex);
5349
5350 return ret;
5351 }
5352
i915_gem_init_mmio(struct drm_i915_private * i915)5353 void i915_gem_init_mmio(struct drm_i915_private *i915)
5354 {
5355 i915_gem_sanitize(i915);
5356 }
5357
5358 void
i915_gem_cleanup_engines(struct drm_i915_private * dev_priv)5359 i915_gem_cleanup_engines(struct drm_i915_private *dev_priv)
5360 {
5361 struct intel_engine_cs *engine;
5362 enum intel_engine_id id;
5363
5364 for_each_engine(engine, dev_priv, id)
5365 dev_priv->gt.cleanup_engine(engine);
5366 }
5367
5368 void
i915_gem_load_init_fences(struct drm_i915_private * dev_priv)5369 i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
5370 {
5371 int i;
5372
5373 if (INTEL_INFO(dev_priv)->gen >= 7 && !IS_VALLEYVIEW(dev_priv) &&
5374 !IS_CHERRYVIEW(dev_priv))
5375 dev_priv->num_fence_regs = 32;
5376 else if (INTEL_INFO(dev_priv)->gen >= 4 ||
5377 IS_I945G(dev_priv) || IS_I945GM(dev_priv) ||
5378 IS_G33(dev_priv) || IS_PINEVIEW(dev_priv))
5379 dev_priv->num_fence_regs = 16;
5380 else
5381 dev_priv->num_fence_regs = 8;
5382
5383 if (intel_vgpu_active(dev_priv))
5384 dev_priv->num_fence_regs =
5385 I915_READ(vgtif_reg(avail_rs.fence_num));
5386
5387 /* Initialize fence registers to zero */
5388 for (i = 0; i < dev_priv->num_fence_regs; i++) {
5389 struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i];
5390
5391 fence->i915 = dev_priv;
5392 fence->id = i;
5393 list_add_tail(&fence->link, &dev_priv->mm.fence_list);
5394 }
5395 i915_gem_restore_fences(dev_priv);
5396
5397 i915_gem_detect_bit_6_swizzle(dev_priv);
5398 }
5399
5400 int
i915_gem_load_init(struct drm_i915_private * dev_priv)5401 i915_gem_load_init(struct drm_i915_private *dev_priv)
5402 {
5403 int err = -ENOMEM;
5404
5405 dev_priv->objects = KMEM_CACHE(drm_i915_gem_object, SLAB_HWCACHE_ALIGN);
5406 if (!dev_priv->objects)
5407 goto err_out;
5408
5409 dev_priv->vmas = KMEM_CACHE(i915_vma, SLAB_HWCACHE_ALIGN);
5410 if (!dev_priv->vmas)
5411 goto err_objects;
5412
5413 dev_priv->luts = KMEM_CACHE(i915_lut_handle, 0);
5414 if (!dev_priv->luts)
5415 goto err_vmas;
5416
5417 dev_priv->requests = KMEM_CACHE(drm_i915_gem_request,
5418 SLAB_HWCACHE_ALIGN |
5419 SLAB_RECLAIM_ACCOUNT |
5420 SLAB_TYPESAFE_BY_RCU);
5421 if (!dev_priv->requests)
5422 goto err_luts;
5423
5424 dev_priv->dependencies = KMEM_CACHE(i915_dependency,
5425 SLAB_HWCACHE_ALIGN |
5426 SLAB_RECLAIM_ACCOUNT);
5427 if (!dev_priv->dependencies)
5428 goto err_requests;
5429
5430 dev_priv->priorities = KMEM_CACHE(i915_priolist, SLAB_HWCACHE_ALIGN);
5431 if (!dev_priv->priorities)
5432 goto err_dependencies;
5433
5434 mutex_lock(&dev_priv->drm.struct_mutex);
5435 INIT_LIST_HEAD(&dev_priv->gt.timelines);
5436 err = i915_gem_timeline_init__global(dev_priv);
5437 mutex_unlock(&dev_priv->drm.struct_mutex);
5438 if (err)
5439 goto err_priorities;
5440
5441 INIT_WORK(&dev_priv->mm.free_work, __i915_gem_free_work);
5442
5443 lockinit(&dev_priv->mm.obj_lock, "i9dpmmo", 0, 0);
5444 lockinit(&dev_priv->mm.free_lock, "i9dpmmf", 0, 0);
5445 init_llist_head(&dev_priv->mm.free_list);
5446 INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
5447 INIT_LIST_HEAD(&dev_priv->mm.bound_list);
5448 INIT_LIST_HEAD(&dev_priv->mm.fence_list);
5449 INIT_LIST_HEAD(&dev_priv->mm.userfault_list);
5450
5451 INIT_DELAYED_WORK(&dev_priv->gt.retire_work,
5452 i915_gem_retire_work_handler);
5453 INIT_DELAYED_WORK(&dev_priv->gt.idle_work,
5454 i915_gem_idle_work_handler);
5455 init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
5456 init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
5457
5458 atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
5459
5460 lockinit(&dev_priv->fb_tracking.lock, "drmftl", 0, 0);
5461
5462 return 0;
5463
5464 err_priorities:
5465 kmem_cache_destroy(dev_priv->priorities);
5466 err_dependencies:
5467 kmem_cache_destroy(dev_priv->dependencies);
5468 err_requests:
5469 kmem_cache_destroy(dev_priv->requests);
5470 err_luts:
5471 kmem_cache_destroy(dev_priv->luts);
5472 err_vmas:
5473 kmem_cache_destroy(dev_priv->vmas);
5474 err_objects:
5475 kmem_cache_destroy(dev_priv->objects);
5476 err_out:
5477 return err;
5478 }
5479
i915_gem_load_cleanup(struct drm_i915_private * dev_priv)5480 void i915_gem_load_cleanup(struct drm_i915_private *dev_priv)
5481 {
5482 i915_gem_drain_freed_objects(dev_priv);
5483 WARN_ON(!llist_empty(&dev_priv->mm.free_list));
5484 WARN_ON(dev_priv->mm.object_count);
5485
5486 mutex_lock(&dev_priv->drm.struct_mutex);
5487 i915_gem_timeline_fini(&dev_priv->gt.global_timeline);
5488 WARN_ON(!list_empty(&dev_priv->gt.timelines));
5489 mutex_unlock(&dev_priv->drm.struct_mutex);
5490
5491 kmem_cache_destroy(dev_priv->priorities);
5492 kmem_cache_destroy(dev_priv->dependencies);
5493 kmem_cache_destroy(dev_priv->requests);
5494 kmem_cache_destroy(dev_priv->luts);
5495 kmem_cache_destroy(dev_priv->vmas);
5496 kmem_cache_destroy(dev_priv->objects);
5497
5498 /* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */
5499 rcu_barrier();
5500
5501 #if 0
5502 i915_gemfs_fini(dev_priv);
5503 #endif
5504 }
5505
i915_gem_freeze(struct drm_i915_private * dev_priv)5506 int i915_gem_freeze(struct drm_i915_private *dev_priv)
5507 {
5508 /* Discard all purgeable objects, let userspace recover those as
5509 * required after resuming.
5510 */
5511 i915_gem_shrink_all(dev_priv);
5512
5513 return 0;
5514 }
5515
i915_gem_freeze_late(struct drm_i915_private * dev_priv)5516 int i915_gem_freeze_late(struct drm_i915_private *dev_priv)
5517 {
5518 struct drm_i915_gem_object *obj;
5519 struct list_head *phases[] = {
5520 &dev_priv->mm.unbound_list,
5521 &dev_priv->mm.bound_list,
5522 NULL
5523 }, **p;
5524
5525 /* Called just before we write the hibernation image.
5526 *
5527 * We need to update the domain tracking to reflect that the CPU
5528 * will be accessing all the pages to create and restore from the
5529 * hibernation, and so upon restoration those pages will be in the
5530 * CPU domain.
5531 *
5532 * To make sure the hibernation image contains the latest state,
5533 * we update that state just before writing out the image.
5534 *
5535 * To try and reduce the hibernation image, we manually shrink
5536 * the objects as well, see i915_gem_freeze()
5537 */
5538
5539 i915_gem_shrink(dev_priv, -1UL, NULL, I915_SHRINK_UNBOUND);
5540 i915_gem_drain_freed_objects(dev_priv);
5541
5542 lockmgr(&dev_priv->mm.obj_lock, LK_EXCLUSIVE);
5543 for (p = phases; *p; p++) {
5544 list_for_each_entry(obj, *p, mm.link)
5545 __start_cpu_write(obj);
5546 }
5547 lockmgr(&dev_priv->mm.obj_lock, LK_RELEASE);
5548
5549 return 0;
5550 }
5551
i915_gem_release(struct drm_device * dev,struct drm_file * file)5552 void i915_gem_release(struct drm_device *dev, struct drm_file *file)
5553 {
5554 struct drm_i915_file_private *file_priv = file->driver_priv;
5555 struct drm_i915_gem_request *request;
5556
5557 /* Clean up our request list when the client is going away, so that
5558 * later retire_requests won't dereference our soon-to-be-gone
5559 * file_priv.
5560 */
5561 lockmgr(&file_priv->mm.lock, LK_EXCLUSIVE);
5562 list_for_each_entry(request, &file_priv->mm.request_list, client_link)
5563 request->file_priv = NULL;
5564 lockmgr(&file_priv->mm.lock, LK_RELEASE);
5565 }
5566
5567 #ifdef __DragonFly__
5568 int
i915_gem_pager_ctor(void * handle,vm_ooffset_t size,vm_prot_t prot,vm_ooffset_t foff,struct ucred * cred,u_short * color)5569 i915_gem_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
5570 vm_ooffset_t foff, struct ucred *cred, u_short *color)
5571 {
5572 *color = 0; /* XXXKIB */
5573 return (0);
5574 }
5575
5576 void
i915_gem_pager_dtor(void * handle)5577 i915_gem_pager_dtor(void *handle)
5578 {
5579 struct drm_gem_object *obj = handle;
5580 struct drm_device *dev = obj->dev;
5581
5582 drm_gem_free_mmap_offset(obj);
5583 mutex_lock(&dev->struct_mutex);
5584 i915_gem_release_mmap(to_intel_bo(obj));
5585 drm_gem_object_unreference(obj);
5586 mutex_unlock(&dev->struct_mutex);
5587 }
5588 #endif
5589
i915_gem_open(struct drm_i915_private * i915,struct drm_file * file)5590 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file)
5591 {
5592 struct drm_i915_file_private *file_priv;
5593 int ret;
5594
5595 DRM_DEBUG("\n");
5596
5597 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
5598 if (!file_priv)
5599 return -ENOMEM;
5600
5601 file->driver_priv = file_priv;
5602 file_priv->dev_priv = i915;
5603 file_priv->file = file;
5604
5605 lockinit(&file_priv->mm.lock, "i915_priv", 0, 0);
5606 INIT_LIST_HEAD(&file_priv->mm.request_list);
5607
5608 file_priv->bsd_engine = -1;
5609
5610 ret = i915_gem_context_open(i915, file);
5611 if (ret)
5612 kfree(file_priv);
5613
5614 return ret;
5615 }
5616
5617 /**
5618 * i915_gem_track_fb - update frontbuffer tracking
5619 * @old: current GEM buffer for the frontbuffer slots
5620 * @new: new GEM buffer for the frontbuffer slots
5621 * @frontbuffer_bits: bitmask of frontbuffer slots
5622 *
5623 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them
5624 * from @old and setting them in @new. Both @old and @new can be NULL.
5625 */
i915_gem_track_fb(struct drm_i915_gem_object * old,struct drm_i915_gem_object * new,unsigned frontbuffer_bits)5626 void i915_gem_track_fb(struct drm_i915_gem_object *old,
5627 struct drm_i915_gem_object *new,
5628 unsigned frontbuffer_bits)
5629 {
5630 /* Control of individual bits within the mask are guarded by
5631 * the owning plane->mutex, i.e. we can never see concurrent
5632 * manipulation of individual bits. But since the bitfield as a whole
5633 * is updated using RMW, we need to use atomics in order to update
5634 * the bits.
5635 */
5636 BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES >
5637 sizeof(atomic_t) * BITS_PER_BYTE);
5638
5639 if (old) {
5640 WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits));
5641 atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits);
5642 }
5643
5644 if (new) {
5645 WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits);
5646 atomic_or(frontbuffer_bits, &new->frontbuffer_bits);
5647 }
5648 }
5649
5650 /* Allocate a new GEM object and fill it with the supplied data */
5651 struct drm_i915_gem_object *
i915_gem_object_create_from_data(struct drm_i915_private * dev_priv,const void * data,size_t size)5652 i915_gem_object_create_from_data(struct drm_i915_private *dev_priv,
5653 const void *data, size_t size)
5654 {
5655 struct drm_i915_gem_object *obj;
5656 struct vm_object *file;
5657 size_t offset;
5658 int err;
5659
5660 obj = i915_gem_object_create(dev_priv, round_up(size, PAGE_SIZE));
5661 if (IS_ERR(obj))
5662 return obj;
5663
5664 GEM_BUG_ON(obj->base.write_domain != I915_GEM_DOMAIN_CPU);
5665
5666 file = obj->base.filp;
5667 offset = 0;
5668 do {
5669 unsigned int len = min_t(typeof(size), size, PAGE_SIZE);
5670 struct page *page;
5671 void *pgdata, *vaddr;
5672
5673 err = pagecache_write_begin(file, NULL,
5674 offset, len, 0,
5675 &page, &pgdata);
5676 if (err < 0)
5677 goto fail;
5678
5679 vaddr = kmap(page);
5680 memcpy(vaddr, data, len);
5681 kunmap(page);
5682
5683 #ifndef __DragonFly__
5684 err = pagecache_write_end(file, file->f_mapping,
5685 #else
5686 err = pagecache_write_end(file, NULL,
5687 #endif
5688 offset, len, len,
5689 page, pgdata);
5690 if (err < 0)
5691 goto fail;
5692
5693 size -= len;
5694 data += len;
5695 offset += len;
5696 } while (size);
5697
5698 return obj;
5699
5700 fail:
5701 i915_gem_object_put(obj);
5702 return ERR_PTR(err);
5703 }
5704
5705 struct scatterlist *
i915_gem_object_get_sg(struct drm_i915_gem_object * obj,unsigned int n,unsigned int * offset)5706 i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
5707 unsigned int n,
5708 unsigned int *offset)
5709 {
5710 struct i915_gem_object_page_iter *iter = &obj->mm.get_page;
5711 struct scatterlist *sg;
5712 unsigned int idx, count;
5713
5714 might_sleep();
5715 GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT);
5716 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
5717
5718 /* As we iterate forward through the sg, we record each entry in a
5719 * radixtree for quick repeated (backwards) lookups. If we have seen
5720 * this index previously, we will have an entry for it.
5721 *
5722 * Initial lookup is O(N), but this is amortized to O(1) for
5723 * sequential page access (where each new request is consecutive
5724 * to the previous one). Repeated lookups are O(lg(obj->base.size)),
5725 * i.e. O(1) with a large constant!
5726 */
5727 if (n < READ_ONCE(iter->sg_idx))
5728 goto lookup;
5729
5730 mutex_lock(&iter->lock);
5731
5732 /* We prefer to reuse the last sg so that repeated lookup of this
5733 * (or the subsequent) sg are fast - comparing against the last
5734 * sg is faster than going through the radixtree.
5735 */
5736
5737 sg = iter->sg_pos;
5738 idx = iter->sg_idx;
5739 count = __sg_page_count(sg);
5740
5741 while (idx + count <= n) {
5742 unsigned long exception, i;
5743 int ret;
5744
5745 /* If we cannot allocate and insert this entry, or the
5746 * individual pages from this range, cancel updating the
5747 * sg_idx so that on this lookup we are forced to linearly
5748 * scan onwards, but on future lookups we will try the
5749 * insertion again (in which case we need to be careful of
5750 * the error return reporting that we have already inserted
5751 * this index).
5752 */
5753 ret = radix_tree_insert(&iter->radix, idx, sg);
5754 if (ret && ret != -EEXIST)
5755 goto scan;
5756
5757 exception =
5758 RADIX_TREE_EXCEPTIONAL_ENTRY |
5759 idx << RADIX_TREE_EXCEPTIONAL_SHIFT;
5760 for (i = 1; i < count; i++) {
5761 ret = radix_tree_insert(&iter->radix, idx + i,
5762 (void *)exception);
5763 if (ret && ret != -EEXIST)
5764 goto scan;
5765 }
5766
5767 idx += count;
5768 sg = ____sg_next(sg);
5769 count = __sg_page_count(sg);
5770 }
5771
5772 scan:
5773 iter->sg_pos = sg;
5774 iter->sg_idx = idx;
5775
5776 mutex_unlock(&iter->lock);
5777
5778 if (unlikely(n < idx)) /* insertion completed by another thread */
5779 goto lookup;
5780
5781 /* In case we failed to insert the entry into the radixtree, we need
5782 * to look beyond the current sg.
5783 */
5784 while (idx + count <= n) {
5785 idx += count;
5786 sg = ____sg_next(sg);
5787 count = __sg_page_count(sg);
5788 }
5789
5790 *offset = n - idx;
5791 return sg;
5792
5793 lookup:
5794 rcu_read_lock();
5795
5796 sg = radix_tree_lookup(&iter->radix, n);
5797 GEM_BUG_ON(!sg);
5798
5799 /* If this index is in the middle of multi-page sg entry,
5800 * the radixtree will contain an exceptional entry that points
5801 * to the start of that range. We will return the pointer to
5802 * the base page and the offset of this page within the
5803 * sg entry's range.
5804 */
5805 *offset = 0;
5806 if (unlikely(radix_tree_exception(sg))) {
5807 unsigned long base =
5808 (unsigned long)sg >> RADIX_TREE_EXCEPTIONAL_SHIFT;
5809
5810 sg = radix_tree_lookup(&iter->radix, base);
5811 GEM_BUG_ON(!sg);
5812
5813 *offset = n - base;
5814 }
5815
5816 rcu_read_unlock();
5817
5818 return sg;
5819 }
5820
5821 struct page *
i915_gem_object_get_page(struct drm_i915_gem_object * obj,unsigned int n)5822 i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n)
5823 {
5824 struct scatterlist *sg;
5825 unsigned int offset;
5826
5827 GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
5828
5829 sg = i915_gem_object_get_sg(obj, n, &offset);
5830 return nth_page(sg_page(sg), offset);
5831 }
5832
5833 /* Like i915_gem_object_get_page(), but mark the returned page dirty */
5834 struct page *
i915_gem_object_get_dirty_page(struct drm_i915_gem_object * obj,unsigned int n)5835 i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
5836 unsigned int n)
5837 {
5838 struct page *page;
5839
5840 page = i915_gem_object_get_page(obj, n);
5841 if (!obj->mm.dirty)
5842 set_page_dirty(page);
5843
5844 return page;
5845 }
5846
5847 dma_addr_t
i915_gem_object_get_dma_address(struct drm_i915_gem_object * obj,unsigned long n)5848 i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
5849 unsigned long n)
5850 {
5851 struct scatterlist *sg;
5852 unsigned int offset;
5853
5854 sg = i915_gem_object_get_sg(obj, n, &offset);
5855 return sg_dma_address(sg) + (offset << PAGE_SHIFT);
5856 }
5857
i915_gem_object_attach_phys(struct drm_i915_gem_object * obj,int align)5858 int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align)
5859 {
5860 struct sg_table *pages;
5861 int err;
5862
5863 if (align > obj->base.size)
5864 return -EINVAL;
5865
5866 if (obj->ops == &i915_gem_phys_ops)
5867 return 0;
5868
5869 if (obj->ops != &i915_gem_object_ops)
5870 return -EINVAL;
5871
5872 err = i915_gem_object_unbind(obj);
5873 if (err)
5874 return err;
5875
5876 mutex_lock(&obj->mm.lock);
5877
5878 if (obj->mm.madv != I915_MADV_WILLNEED) {
5879 err = -EFAULT;
5880 goto err_unlock;
5881 }
5882
5883 if (obj->mm.quirked) {
5884 err = -EFAULT;
5885 goto err_unlock;
5886 }
5887
5888 if (obj->mm.mapping) {
5889 err = -EBUSY;
5890 goto err_unlock;
5891 }
5892
5893 pages = fetch_and_zero(&obj->mm.pages);
5894 if (pages) {
5895 struct drm_i915_private *i915 = to_i915(obj->base.dev);
5896
5897 __i915_gem_object_reset_page_iter(obj);
5898
5899 lockmgr(&i915->mm.obj_lock, LK_EXCLUSIVE);
5900 list_del(&obj->mm.link);
5901 lockmgr(&i915->mm.obj_lock, LK_RELEASE);
5902 }
5903
5904 obj->ops = &i915_gem_phys_ops;
5905
5906 err = ____i915_gem_object_get_pages(obj);
5907 if (err)
5908 goto err_xfer;
5909
5910 /* Perma-pin (until release) the physical set of pages */
5911 __i915_gem_object_pin_pages(obj);
5912
5913 if (!IS_ERR_OR_NULL(pages))
5914 i915_gem_object_ops.put_pages(obj, pages);
5915 mutex_unlock(&obj->mm.lock);
5916 return 0;
5917
5918 err_xfer:
5919 obj->ops = &i915_gem_object_ops;
5920 obj->mm.pages = pages;
5921 err_unlock:
5922 mutex_unlock(&obj->mm.lock);
5923 return err;
5924 }
5925
5926 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
5927 #include "selftests/scatterlist.c"
5928 #include "selftests/mock_gem_device.c"
5929 #include "selftests/huge_gem_object.c"
5930 #include "selftests/huge_pages.c"
5931 #include "selftests/i915_gem_object.c"
5932 #include "selftests/i915_gem_coherency.c"
5933 #endif
5934