1 /* 2 * Copyright © 2008 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 * Copyright (c) 2011 The FreeBSD Foundation 27 * All rights reserved. 28 * 29 * This software was developed by Konstantin Belousov under sponsorship from 30 * the FreeBSD Foundation. 31 * 32 * Redistribution and use in source and binary forms, with or without 33 * modification, are permitted provided that the following conditions 34 * are met: 35 * 1. Redistributions of source code must retain the above copyright 36 * notice, this list of conditions and the following disclaimer. 37 * 2. Redistributions in binary form must reproduce the above copyright 38 * notice, this list of conditions and the following disclaimer in the 39 * documentation and/or other materials provided with the distribution. 40 * 41 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 51 * SUCH DAMAGE. 52 * 53 */ 54 55 #include <machine/md_var.h> 56 57 #include <drm/drmP.h> 58 #include <drm/drm_vma_manager.h> 59 #include <drm/i915_drm.h> 60 #include "i915_drv.h" 61 #include "i915_trace.h" 62 #include "intel_drv.h" 63 #include <linux/shmem_fs.h> 64 #include <linux/slab.h> 65 #include <linux/swap.h> 66 #include <linux/pci.h> 67 68 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); 69 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj, 70 bool force); 71 static __must_check int 72 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, 73 bool readonly); 74 static void 75 i915_gem_object_retire(struct drm_i915_gem_object *obj); 76 77 static void i915_gem_write_fence(struct drm_device *dev, int reg, 78 struct drm_i915_gem_object *obj); 79 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, 80 struct drm_i915_fence_reg *fence, 81 bool enable); 82 83 static unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv); 84 85 static bool cpu_cache_is_coherent(struct drm_device *dev, 86 enum i915_cache_level level) 87 { 88 return HAS_LLC(dev) || level != I915_CACHE_NONE; 89 } 90 91 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 92 { 93 if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) 94 return true; 95 96 return obj->pin_display; 97 } 98 99 static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) 100 { 101 if (obj->tiling_mode) 102 i915_gem_release_mmap(obj); 103 104 /* As we do not have an associated fence register, we will force 105 * a tiling change if we ever need to acquire one. 106 */ 107 obj->fence_dirty = false; 108 obj->fence_reg = I915_FENCE_REG_NONE; 109 } 110 111 /* some bookkeeping */ 112 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 113 size_t size) 114 { 115 spin_lock(&dev_priv->mm.object_stat_lock); 116 dev_priv->mm.object_count++; 117 dev_priv->mm.object_memory += size; 118 spin_unlock(&dev_priv->mm.object_stat_lock); 119 } 120 121 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 122 size_t size) 123 { 124 spin_lock(&dev_priv->mm.object_stat_lock); 125 dev_priv->mm.object_count--; 126 dev_priv->mm.object_memory -= size; 127 spin_unlock(&dev_priv->mm.object_stat_lock); 128 } 129 130 static int 131 i915_gem_wait_for_error(struct i915_gpu_error *error) 132 { 133 int ret; 134 135 #define EXIT_COND (!i915_reset_in_progress(error) || \ 136 i915_terminally_wedged(error)) 137 if (EXIT_COND) 138 return 0; 139 140 /* 141 * Only wait 10 seconds for the gpu reset to complete to avoid hanging 142 * userspace. If it takes that long something really bad is going on and 143 * we should simply try to bail out and fail as gracefully as possible. 144 */ 145 ret = wait_event_interruptible_timeout(error->reset_queue, 146 EXIT_COND, 147 10*HZ); 148 if (ret == 0) { 149 DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); 150 return -EIO; 151 } else if (ret < 0) { 152 return ret; 153 } 154 #undef EXIT_COND 155 156 return 0; 157 } 158 159 int i915_mutex_lock_interruptible(struct drm_device *dev) 160 { 161 struct drm_i915_private *dev_priv = dev->dev_private; 162 int ret; 163 164 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 165 if (ret) 166 return ret; 167 168 ret = mutex_lock_interruptible(&dev->struct_mutex); 169 if (ret) 170 return ret; 171 172 WARN_ON(i915_verify_lists(dev)); 173 return 0; 174 } 175 176 static inline bool 177 i915_gem_object_is_inactive(struct drm_i915_gem_object *obj) 178 { 179 return i915_gem_obj_bound_any(obj) && !obj->active; 180 } 181 182 int 183 i915_gem_init_ioctl(struct drm_device *dev, void *data, 184 struct drm_file *file) 185 { 186 struct drm_i915_private *dev_priv = dev->dev_private; 187 struct drm_i915_gem_init *args = data; 188 189 if (drm_core_check_feature(dev, DRIVER_MODESET)) 190 return -ENODEV; 191 192 if (args->gtt_start >= args->gtt_end || 193 (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1)) 194 return -EINVAL; 195 196 /* GEM with user mode setting was never supported on ilk and later. */ 197 if (INTEL_INFO(dev)->gen >= 5) 198 return -ENODEV; 199 200 mutex_lock(&dev->struct_mutex); 201 kprintf("INITGLOBALGTT GTT_START %016jx\n", (uintmax_t)args->gtt_start); 202 i915_gem_setup_global_gtt(dev, args->gtt_start, args->gtt_end, 203 args->gtt_end); 204 dev_priv->gtt.mappable_end = args->gtt_end; 205 mutex_unlock(&dev->struct_mutex); 206 207 return 0; 208 } 209 210 int 211 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 212 struct drm_file *file) 213 { 214 struct drm_i915_private *dev_priv = dev->dev_private; 215 struct drm_i915_gem_get_aperture *args = data; 216 struct drm_i915_gem_object *obj; 217 size_t pinned; 218 219 pinned = 0; 220 mutex_lock(&dev->struct_mutex); 221 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) 222 if (i915_gem_obj_is_pinned(obj)) 223 pinned += i915_gem_obj_ggtt_size(obj); 224 mutex_unlock(&dev->struct_mutex); 225 226 args->aper_size = dev_priv->gtt.base.total; 227 args->aper_available_size = args->aper_size - pinned; 228 229 return 0; 230 } 231 232 static void i915_gem_object_detach_phys(struct drm_i915_gem_object *obj) 233 { 234 drm_dma_handle_t *phys = obj->phys_handle; 235 236 if (!phys) 237 return; 238 239 if (obj->madv == I915_MADV_WILLNEED) { 240 struct vm_object *mapping = obj->base.vm_obj; 241 char *vaddr = phys->vaddr; 242 int i; 243 244 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 245 struct vm_page *page = shmem_read_mapping_page(mapping, i); 246 if (!IS_ERR(page)) { 247 char *dst = kmap_atomic(page); 248 memcpy(dst, vaddr, PAGE_SIZE); 249 drm_clflush_virt_range(dst, PAGE_SIZE); 250 kunmap_atomic(dst); 251 252 set_page_dirty(page); 253 mark_page_accessed(page); 254 #if 0 255 page_cache_release(page); 256 #endif 257 } 258 vaddr += PAGE_SIZE; 259 } 260 i915_gem_chipset_flush(obj->base.dev); 261 } 262 263 #ifdef CONFIG_X86 264 set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE); 265 #endif 266 drm_pci_free(obj->base.dev, phys); 267 obj->phys_handle = NULL; 268 } 269 270 int 271 i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, 272 int align) 273 { 274 drm_dma_handle_t *phys; 275 struct vm_object *mapping; 276 char *vaddr; 277 int i; 278 279 if (obj->phys_handle) { 280 if ((unsigned long)obj->phys_handle->vaddr & (align -1)) 281 return -EBUSY; 282 283 return 0; 284 } 285 286 if (obj->madv != I915_MADV_WILLNEED) 287 return -EFAULT; 288 289 #if 0 290 if (obj->base.filp == NULL) 291 return -EINVAL; 292 #endif 293 294 /* create a new object */ 295 phys = drm_pci_alloc(obj->base.dev, obj->base.size, align); 296 if (!phys) 297 return -ENOMEM; 298 299 vaddr = phys->vaddr; 300 #ifdef CONFIG_X86 301 set_memory_wc((unsigned long)vaddr, phys->size / PAGE_SIZE); 302 #endif 303 mapping = obj->base.vm_obj; 304 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 305 struct vm_page *page; 306 char *src; 307 308 page = shmem_read_mapping_page(mapping, i); 309 if (IS_ERR(page)) { 310 #ifdef CONFIG_X86 311 set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE); 312 #endif 313 drm_pci_free(obj->base.dev, phys); 314 return PTR_ERR(page); 315 } 316 317 src = kmap_atomic(page); 318 memcpy(vaddr, src, PAGE_SIZE); 319 kunmap_atomic(src); 320 321 mark_page_accessed(page); 322 #if 0 323 page_cache_release(page); 324 #endif 325 326 vaddr += PAGE_SIZE; 327 } 328 329 obj->phys_handle = phys; 330 return 0; 331 } 332 333 static int 334 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, 335 struct drm_i915_gem_pwrite *args, 336 struct drm_file *file_priv) 337 { 338 struct drm_device *dev = obj->base.dev; 339 void *vaddr = (char *)obj->phys_handle->vaddr + args->offset; 340 char __user *user_data = to_user_ptr(args->data_ptr); 341 342 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) { 343 unsigned long unwritten; 344 345 /* The physical object once assigned is fixed for the lifetime 346 * of the obj, so we can safely drop the lock and continue 347 * to access vaddr. 348 */ 349 mutex_unlock(&dev->struct_mutex); 350 unwritten = copy_from_user(vaddr, user_data, args->size); 351 mutex_lock(&dev->struct_mutex); 352 if (unwritten) 353 return -EFAULT; 354 } 355 356 i915_gem_chipset_flush(dev); 357 return 0; 358 } 359 360 void *i915_gem_object_alloc(struct drm_device *dev) 361 { 362 return kmalloc(sizeof(struct drm_i915_gem_object), 363 M_DRM, M_WAITOK | M_ZERO); 364 } 365 366 void i915_gem_object_free(struct drm_i915_gem_object *obj) 367 { 368 kfree(obj); 369 } 370 371 static int 372 i915_gem_create(struct drm_file *file, 373 struct drm_device *dev, 374 uint64_t size, 375 uint32_t *handle_p) 376 { 377 struct drm_i915_gem_object *obj; 378 int ret; 379 u32 handle; 380 381 size = roundup(size, PAGE_SIZE); 382 if (size == 0) 383 return -EINVAL; 384 385 /* Allocate the new object */ 386 obj = i915_gem_alloc_object(dev, size); 387 if (obj == NULL) 388 return -ENOMEM; 389 390 ret = drm_gem_handle_create(file, &obj->base, &handle); 391 /* drop reference from allocate - handle holds it now */ 392 drm_gem_object_unreference_unlocked(&obj->base); 393 if (ret) 394 return ret; 395 396 *handle_p = handle; 397 return 0; 398 } 399 400 int 401 i915_gem_dumb_create(struct drm_file *file, 402 struct drm_device *dev, 403 struct drm_mode_create_dumb *args) 404 { 405 /* have to work out size/pitch and return them */ 406 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); 407 args->size = args->pitch * args->height; 408 return i915_gem_create(file, dev, 409 args->size, &args->handle); 410 } 411 412 /** 413 * Creates a new mm object and returns a handle to it. 414 */ 415 int 416 i915_gem_create_ioctl(struct drm_device *dev, void *data, 417 struct drm_file *file) 418 { 419 struct drm_i915_gem_create *args = data; 420 421 return i915_gem_create(file, dev, 422 args->size, &args->handle); 423 } 424 425 static inline int 426 __copy_to_user_swizzled(char __user *cpu_vaddr, 427 const char *gpu_vaddr, int gpu_offset, 428 int length) 429 { 430 int ret, cpu_offset = 0; 431 432 while (length > 0) { 433 int cacheline_end = ALIGN(gpu_offset + 1, 64); 434 int this_length = min(cacheline_end - gpu_offset, length); 435 int swizzled_gpu_offset = gpu_offset ^ 64; 436 437 ret = __copy_to_user(cpu_vaddr + cpu_offset, 438 gpu_vaddr + swizzled_gpu_offset, 439 this_length); 440 if (ret) 441 return ret + length; 442 443 cpu_offset += this_length; 444 gpu_offset += this_length; 445 length -= this_length; 446 } 447 448 return 0; 449 } 450 451 static inline int 452 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, 453 const char __user *cpu_vaddr, 454 int length) 455 { 456 int ret, cpu_offset = 0; 457 458 while (length > 0) { 459 int cacheline_end = ALIGN(gpu_offset + 1, 64); 460 int this_length = min(cacheline_end - gpu_offset, length); 461 int swizzled_gpu_offset = gpu_offset ^ 64; 462 463 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, 464 cpu_vaddr + cpu_offset, 465 this_length); 466 if (ret) 467 return ret + length; 468 469 cpu_offset += this_length; 470 gpu_offset += this_length; 471 length -= this_length; 472 } 473 474 return 0; 475 } 476 477 /* 478 * Pins the specified object's pages and synchronizes the object with 479 * GPU accesses. Sets needs_clflush to non-zero if the caller should 480 * flush the object from the CPU cache. 481 */ 482 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, 483 int *needs_clflush) 484 { 485 int ret; 486 487 *needs_clflush = 0; 488 489 #if 0 490 if (!obj->base.filp) 491 return -EINVAL; 492 #endif 493 494 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) { 495 /* If we're not in the cpu read domain, set ourself into the gtt 496 * read domain and manually flush cachelines (if required). This 497 * optimizes for the case when the gpu will dirty the data 498 * anyway again before the next pread happens. */ 499 *needs_clflush = !cpu_cache_is_coherent(obj->base.dev, 500 obj->cache_level); 501 ret = i915_gem_object_wait_rendering(obj, true); 502 if (ret) 503 return ret; 504 505 i915_gem_object_retire(obj); 506 } 507 508 ret = i915_gem_object_get_pages(obj); 509 if (ret) 510 return ret; 511 512 i915_gem_object_pin_pages(obj); 513 514 return ret; 515 } 516 517 /* Per-page copy function for the shmem pread fastpath. 518 * Flushes invalid cachelines before reading the target if 519 * needs_clflush is set. */ 520 static int 521 shmem_pread_fast(struct vm_page *page, int shmem_page_offset, int page_length, 522 char __user *user_data, 523 bool page_do_bit17_swizzling, bool needs_clflush) 524 { 525 char *vaddr; 526 int ret; 527 528 if (unlikely(page_do_bit17_swizzling)) 529 return -EINVAL; 530 531 vaddr = kmap_atomic(page); 532 if (needs_clflush) 533 drm_clflush_virt_range(vaddr + shmem_page_offset, 534 page_length); 535 ret = __copy_to_user_inatomic(user_data, 536 vaddr + shmem_page_offset, 537 page_length); 538 kunmap_atomic(vaddr); 539 540 return ret ? -EFAULT : 0; 541 } 542 543 static void 544 shmem_clflush_swizzled_range(char *addr, unsigned long length, 545 bool swizzled) 546 { 547 if (unlikely(swizzled)) { 548 unsigned long start = (unsigned long) addr; 549 unsigned long end = (unsigned long) addr + length; 550 551 /* For swizzling simply ensure that we always flush both 552 * channels. Lame, but simple and it works. Swizzled 553 * pwrite/pread is far from a hotpath - current userspace 554 * doesn't use it at all. */ 555 start = round_down(start, 128); 556 end = round_up(end, 128); 557 558 drm_clflush_virt_range((void *)start, end - start); 559 } else { 560 drm_clflush_virt_range(addr, length); 561 } 562 563 } 564 565 /* Only difference to the fast-path function is that this can handle bit17 566 * and uses non-atomic copy and kmap functions. */ 567 static int 568 shmem_pread_slow(struct vm_page *page, int shmem_page_offset, int page_length, 569 char __user *user_data, 570 bool page_do_bit17_swizzling, bool needs_clflush) 571 { 572 char *vaddr; 573 int ret; 574 575 vaddr = kmap(page); 576 if (needs_clflush) 577 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 578 page_length, 579 page_do_bit17_swizzling); 580 581 if (page_do_bit17_swizzling) 582 ret = __copy_to_user_swizzled(user_data, 583 vaddr, shmem_page_offset, 584 page_length); 585 else 586 ret = __copy_to_user(user_data, 587 vaddr + shmem_page_offset, 588 page_length); 589 kunmap(page); 590 591 return ret ? - EFAULT : 0; 592 } 593 594 static int 595 i915_gem_shmem_pread(struct drm_device *dev, 596 struct drm_i915_gem_object *obj, 597 struct drm_i915_gem_pread *args, 598 struct drm_file *file) 599 { 600 char __user *user_data; 601 ssize_t remain; 602 loff_t offset; 603 int shmem_page_offset, page_length, ret = 0; 604 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 605 int prefaulted = 0; 606 int needs_clflush = 0; 607 int i; 608 609 user_data = to_user_ptr(args->data_ptr); 610 remain = args->size; 611 612 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 613 614 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); 615 if (ret) 616 return ret; 617 618 offset = args->offset; 619 620 for (i = 0; i < (obj->base.size >> PAGE_SHIFT); i++) { 621 struct vm_page *page = obj->pages[i]; 622 623 if (remain <= 0) 624 break; 625 626 /* Operation in this page 627 * 628 * shmem_page_offset = offset within page in shmem file 629 * page_length = bytes to copy for this page 630 */ 631 shmem_page_offset = offset_in_page(offset); 632 page_length = remain; 633 if ((shmem_page_offset + page_length) > PAGE_SIZE) 634 page_length = PAGE_SIZE - shmem_page_offset; 635 636 page_do_bit17_swizzling = obj_do_bit17_swizzling && 637 (page_to_phys(page) & (1 << 17)) != 0; 638 639 ret = shmem_pread_fast(page, shmem_page_offset, page_length, 640 user_data, page_do_bit17_swizzling, 641 needs_clflush); 642 if (ret == 0) 643 goto next_page; 644 645 mutex_unlock(&dev->struct_mutex); 646 647 if (likely(!i915.prefault_disable) && !prefaulted) { 648 ret = fault_in_multipages_writeable(user_data, remain); 649 /* Userspace is tricking us, but we've already clobbered 650 * its pages with the prefault and promised to write the 651 * data up to the first fault. Hence ignore any errors 652 * and just continue. */ 653 (void)ret; 654 prefaulted = 1; 655 } 656 657 ret = shmem_pread_slow(page, shmem_page_offset, page_length, 658 user_data, page_do_bit17_swizzling, 659 needs_clflush); 660 661 mutex_lock(&dev->struct_mutex); 662 663 if (ret) 664 goto out; 665 666 next_page: 667 remain -= page_length; 668 user_data += page_length; 669 offset += page_length; 670 } 671 672 out: 673 i915_gem_object_unpin_pages(obj); 674 675 return ret; 676 } 677 678 /** 679 * Reads data from the object referenced by handle. 680 * 681 * On error, the contents of *data are undefined. 682 */ 683 int 684 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 685 struct drm_file *file) 686 { 687 struct drm_i915_gem_pread *args = data; 688 struct drm_i915_gem_object *obj; 689 int ret = 0; 690 691 if (args->size == 0) 692 return 0; 693 694 ret = i915_mutex_lock_interruptible(dev); 695 if (ret) 696 return ret; 697 698 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 699 if (&obj->base == NULL) { 700 ret = -ENOENT; 701 goto unlock; 702 } 703 704 /* Bounds check source. */ 705 if (args->offset > obj->base.size || 706 args->size > obj->base.size - args->offset) { 707 ret = -EINVAL; 708 goto out; 709 } 710 711 trace_i915_gem_object_pread(obj, args->offset, args->size); 712 713 ret = i915_gem_shmem_pread(dev, obj, args, file); 714 715 out: 716 drm_gem_object_unreference(&obj->base); 717 unlock: 718 mutex_unlock(&dev->struct_mutex); 719 return ret; 720 } 721 722 /* This is the fast write path which cannot handle 723 * page faults in the source data 724 */ 725 726 static inline int 727 fast_user_write(struct io_mapping *mapping, 728 loff_t page_base, int page_offset, 729 char __user *user_data, 730 int length) 731 { 732 void __iomem *vaddr_atomic; 733 void *vaddr; 734 unsigned long unwritten; 735 736 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); 737 /* We can use the cpu mem copy function because this is X86. */ 738 vaddr = (char __force*)vaddr_atomic + page_offset; 739 unwritten = __copy_from_user_inatomic_nocache(vaddr, 740 user_data, length); 741 io_mapping_unmap_atomic(vaddr_atomic); 742 return unwritten; 743 } 744 745 /** 746 * This is the fast pwrite path, where we copy the data directly from the 747 * user into the GTT, uncached. 748 */ 749 static int 750 i915_gem_gtt_pwrite_fast(struct drm_device *dev, 751 struct drm_i915_gem_object *obj, 752 struct drm_i915_gem_pwrite *args, 753 struct drm_file *file) 754 { 755 struct drm_i915_private *dev_priv = dev->dev_private; 756 ssize_t remain; 757 loff_t offset, page_base; 758 char __user *user_data; 759 int page_offset, page_length, ret; 760 761 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK); 762 if (ret) 763 goto out; 764 765 ret = i915_gem_object_set_to_gtt_domain(obj, true); 766 if (ret) 767 goto out_unpin; 768 769 ret = i915_gem_object_put_fence(obj); 770 if (ret) 771 goto out_unpin; 772 773 user_data = to_user_ptr(args->data_ptr); 774 remain = args->size; 775 776 offset = i915_gem_obj_ggtt_offset(obj) + args->offset; 777 778 while (remain > 0) { 779 /* Operation in this page 780 * 781 * page_base = page offset within aperture 782 * page_offset = offset within page 783 * page_length = bytes to copy for this page 784 */ 785 page_base = offset & ~PAGE_MASK; 786 page_offset = offset_in_page(offset); 787 page_length = remain; 788 if ((page_offset + remain) > PAGE_SIZE) 789 page_length = PAGE_SIZE - page_offset; 790 791 /* If we get a fault while copying data, then (presumably) our 792 * source page isn't available. Return the error and we'll 793 * retry in the slow path. 794 */ 795 if (fast_user_write(dev_priv->gtt.mappable, page_base, 796 page_offset, user_data, page_length)) { 797 ret = -EFAULT; 798 goto out_unpin; 799 } 800 801 remain -= page_length; 802 user_data += page_length; 803 offset += page_length; 804 } 805 806 out_unpin: 807 i915_gem_object_ggtt_unpin(obj); 808 out: 809 return ret; 810 } 811 812 /* Per-page copy function for the shmem pwrite fastpath. 813 * Flushes invalid cachelines before writing to the target if 814 * needs_clflush_before is set and flushes out any written cachelines after 815 * writing if needs_clflush is set. */ 816 static int 817 shmem_pwrite_fast(struct vm_page *page, int shmem_page_offset, int page_length, 818 char __user *user_data, 819 bool page_do_bit17_swizzling, 820 bool needs_clflush_before, 821 bool needs_clflush_after) 822 { 823 char *vaddr; 824 int ret; 825 826 if (unlikely(page_do_bit17_swizzling)) 827 return -EINVAL; 828 829 vaddr = kmap_atomic(page); 830 if (needs_clflush_before) 831 drm_clflush_virt_range(vaddr + shmem_page_offset, 832 page_length); 833 ret = __copy_from_user_inatomic(vaddr + shmem_page_offset, 834 user_data, page_length); 835 if (needs_clflush_after) 836 drm_clflush_virt_range(vaddr + shmem_page_offset, 837 page_length); 838 kunmap_atomic(vaddr); 839 840 return ret ? -EFAULT : 0; 841 } 842 843 /* Only difference to the fast-path function is that this can handle bit17 844 * and uses non-atomic copy and kmap functions. */ 845 static int 846 shmem_pwrite_slow(struct vm_page *page, int shmem_page_offset, int page_length, 847 char __user *user_data, 848 bool page_do_bit17_swizzling, 849 bool needs_clflush_before, 850 bool needs_clflush_after) 851 { 852 char *vaddr; 853 int ret; 854 855 vaddr = kmap(page); 856 if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) 857 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 858 page_length, 859 page_do_bit17_swizzling); 860 if (page_do_bit17_swizzling) 861 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset, 862 user_data, 863 page_length); 864 else 865 ret = __copy_from_user(vaddr + shmem_page_offset, 866 user_data, 867 page_length); 868 if (needs_clflush_after) 869 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 870 page_length, 871 page_do_bit17_swizzling); 872 kunmap(page); 873 874 return ret ? -EFAULT : 0; 875 } 876 877 static int 878 i915_gem_shmem_pwrite(struct drm_device *dev, 879 struct drm_i915_gem_object *obj, 880 struct drm_i915_gem_pwrite *args, 881 struct drm_file *file) 882 { 883 ssize_t remain; 884 loff_t offset; 885 char __user *user_data; 886 int shmem_page_offset, page_length, ret = 0; 887 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 888 int hit_slowpath = 0; 889 int needs_clflush_after = 0; 890 int needs_clflush_before = 0; 891 int i; 892 893 user_data = to_user_ptr(args->data_ptr); 894 remain = args->size; 895 896 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 897 898 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 899 /* If we're not in the cpu write domain, set ourself into the gtt 900 * write domain and manually flush cachelines (if required). This 901 * optimizes for the case when the gpu will use the data 902 * right away and we therefore have to clflush anyway. */ 903 needs_clflush_after = cpu_write_needs_clflush(obj); 904 ret = i915_gem_object_wait_rendering(obj, false); 905 if (ret) 906 return ret; 907 908 i915_gem_object_retire(obj); 909 } 910 /* Same trick applies to invalidate partially written cachelines read 911 * before writing. */ 912 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) 913 needs_clflush_before = 914 !cpu_cache_is_coherent(dev, obj->cache_level); 915 916 ret = i915_gem_object_get_pages(obj); 917 if (ret) 918 return ret; 919 920 i915_gem_object_pin_pages(obj); 921 922 offset = args->offset; 923 obj->dirty = 1; 924 925 VM_OBJECT_LOCK(obj->base.vm_obj); 926 vm_object_pip_add(obj->base.vm_obj, 1); 927 for (i = 0; i < (obj->base.size >> PAGE_SHIFT); i++) { 928 struct vm_page *page = obj->pages[i]; 929 int partial_cacheline_write; 930 931 if (i < offset >> PAGE_SHIFT) 932 continue; 933 934 if (remain <= 0) 935 break; 936 937 /* Operation in this page 938 * 939 * shmem_page_offset = offset within page in shmem file 940 * page_length = bytes to copy for this page 941 */ 942 shmem_page_offset = offset_in_page(offset); 943 944 page_length = remain; 945 if ((shmem_page_offset + page_length) > PAGE_SIZE) 946 page_length = PAGE_SIZE - shmem_page_offset; 947 948 /* If we don't overwrite a cacheline completely we need to be 949 * careful to have up-to-date data by first clflushing. Don't 950 * overcomplicate things and flush the entire patch. */ 951 partial_cacheline_write = needs_clflush_before && 952 ((shmem_page_offset | page_length) 953 & (cpu_clflush_line_size - 1)); 954 955 page_do_bit17_swizzling = obj_do_bit17_swizzling && 956 (page_to_phys(page) & (1 << 17)) != 0; 957 958 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length, 959 user_data, page_do_bit17_swizzling, 960 partial_cacheline_write, 961 needs_clflush_after); 962 if (ret == 0) 963 goto next_page; 964 965 hit_slowpath = 1; 966 mutex_unlock(&dev->struct_mutex); 967 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length, 968 user_data, page_do_bit17_swizzling, 969 partial_cacheline_write, 970 needs_clflush_after); 971 972 mutex_lock(&dev->struct_mutex); 973 974 if (ret) 975 goto out; 976 977 next_page: 978 remain -= page_length; 979 user_data += page_length; 980 offset += page_length; 981 } 982 vm_object_pip_wakeup(obj->base.vm_obj); 983 VM_OBJECT_UNLOCK(obj->base.vm_obj); 984 985 out: 986 i915_gem_object_unpin_pages(obj); 987 988 if (hit_slowpath) { 989 /* 990 * Fixup: Flush cpu caches in case we didn't flush the dirty 991 * cachelines in-line while writing and the object moved 992 * out of the cpu write domain while we've dropped the lock. 993 */ 994 if (!needs_clflush_after && 995 obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 996 if (i915_gem_clflush_object(obj, obj->pin_display)) 997 i915_gem_chipset_flush(dev); 998 } 999 } 1000 1001 if (needs_clflush_after) 1002 i915_gem_chipset_flush(dev); 1003 1004 return ret; 1005 } 1006 1007 /** 1008 * Writes data to the object referenced by handle. 1009 * 1010 * On error, the contents of the buffer that were to be modified are undefined. 1011 */ 1012 int 1013 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 1014 struct drm_file *file) 1015 { 1016 struct drm_i915_gem_pwrite *args = data; 1017 struct drm_i915_gem_object *obj; 1018 int ret; 1019 1020 if (args->size == 0) 1021 return 0; 1022 1023 if (likely(!i915.prefault_disable)) { 1024 ret = fault_in_multipages_readable(to_user_ptr(args->data_ptr), 1025 args->size); 1026 if (ret) 1027 return -EFAULT; 1028 } 1029 1030 ret = i915_mutex_lock_interruptible(dev); 1031 if (ret) 1032 return ret; 1033 1034 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1035 if (&obj->base == NULL) { 1036 ret = -ENOENT; 1037 goto unlock; 1038 } 1039 1040 /* Bounds check destination. */ 1041 if (args->offset > obj->base.size || 1042 args->size > obj->base.size - args->offset) { 1043 ret = -EINVAL; 1044 goto out; 1045 } 1046 1047 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 1048 1049 ret = -EFAULT; 1050 /* We can only do the GTT pwrite on untiled buffers, as otherwise 1051 * it would end up going through the fenced access, and we'll get 1052 * different detiling behavior between reading and writing. 1053 * pread/pwrite currently are reading and writing from the CPU 1054 * perspective, requiring manual detiling by the client. 1055 */ 1056 if (obj->phys_handle) { 1057 ret = i915_gem_phys_pwrite(obj, args, file); 1058 goto out; 1059 } 1060 1061 if (obj->tiling_mode == I915_TILING_NONE && 1062 obj->base.write_domain != I915_GEM_DOMAIN_CPU && 1063 cpu_write_needs_clflush(obj)) { 1064 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); 1065 /* Note that the gtt paths might fail with non-page-backed user 1066 * pointers (e.g. gtt mappings when moving data between 1067 * textures). Fallback to the shmem path in that case. */ 1068 } 1069 1070 if (ret == -EFAULT || ret == -ENOSPC) 1071 ret = i915_gem_shmem_pwrite(dev, obj, args, file); 1072 1073 out: 1074 drm_gem_object_unreference(&obj->base); 1075 unlock: 1076 mutex_unlock(&dev->struct_mutex); 1077 return ret; 1078 } 1079 1080 int 1081 i915_gem_check_wedge(struct i915_gpu_error *error, 1082 bool interruptible) 1083 { 1084 if (i915_reset_in_progress(error)) { 1085 /* Non-interruptible callers can't handle -EAGAIN, hence return 1086 * -EIO unconditionally for these. */ 1087 if (!interruptible) 1088 return -EIO; 1089 1090 /* Recovery complete, but the reset failed ... */ 1091 if (i915_terminally_wedged(error)) 1092 return -EIO; 1093 1094 /* 1095 * Check if GPU Reset is in progress - we need intel_ring_begin 1096 * to work properly to reinit the hw state while the gpu is 1097 * still marked as reset-in-progress. Handle this with a flag. 1098 */ 1099 if (!error->reload_in_reset) 1100 return -EAGAIN; 1101 } 1102 1103 return 0; 1104 } 1105 1106 /* 1107 * Compare seqno against outstanding lazy request. Emit a request if they are 1108 * equal. 1109 */ 1110 int 1111 i915_gem_check_olr(struct intel_engine_cs *ring, u32 seqno) 1112 { 1113 int ret; 1114 1115 BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex)); 1116 1117 ret = 0; 1118 if (seqno == ring->outstanding_lazy_seqno) 1119 ret = i915_add_request(ring, NULL); 1120 1121 return ret; 1122 } 1123 1124 #if 0 1125 static void fake_irq(unsigned long data) 1126 { 1127 wake_up_process((struct task_struct *)data); 1128 } 1129 1130 static bool missed_irq(struct drm_i915_private *dev_priv, 1131 struct intel_engine_cs *ring) 1132 { 1133 return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings); 1134 } 1135 1136 static bool can_wait_boost(struct drm_i915_file_private *file_priv) 1137 { 1138 if (file_priv == NULL) 1139 return true; 1140 1141 return !atomic_xchg(&file_priv->rps_wait_boost, true); 1142 } 1143 #endif 1144 1145 /** 1146 * __wait_seqno - wait until execution of seqno has finished 1147 * @ring: the ring expected to report seqno 1148 * @seqno: duh! 1149 * @reset_counter: reset sequence associated with the given seqno 1150 * @interruptible: do an interruptible wait (normally yes) 1151 * @timeout: in - how long to wait (NULL forever); out - how much time remaining 1152 * 1153 * Note: It is of utmost importance that the passed in seqno and reset_counter 1154 * values have been read by the caller in an smp safe manner. Where read-side 1155 * locks are involved, it is sufficient to read the reset_counter before 1156 * unlocking the lock that protects the seqno. For lockless tricks, the 1157 * reset_counter _must_ be read before, and an appropriate smp_rmb must be 1158 * inserted. 1159 * 1160 * Returns 0 if the seqno was found within the alloted time. Else returns the 1161 * errno with remaining time filled in timeout argument. 1162 */ 1163 static int __wait_seqno(struct intel_engine_cs *ring, u32 seqno, 1164 unsigned reset_counter, 1165 bool interruptible, 1166 struct timespec *timeout, 1167 struct drm_i915_file_private *file_priv) 1168 { 1169 struct drm_i915_private *dev_priv = ring->dev->dev_private; 1170 struct timespec before, now, wait_time={1,0}; 1171 unsigned long timeout_jiffies; 1172 long end; 1173 bool wait_forever = true; 1174 int ret; 1175 1176 WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled"); 1177 1178 if (i915_seqno_passed(ring->get_seqno(ring, true), seqno)) 1179 return 0; 1180 1181 if (timeout != NULL) { 1182 wait_time = *timeout; 1183 wait_forever = false; 1184 } 1185 1186 timeout_jiffies = timespec_to_jiffies_timeout(&wait_time); 1187 1188 if (WARN_ON(!ring->irq_get(ring))) 1189 return -ENODEV; 1190 1191 /* Record current time in case interrupted by signal, or wedged */ 1192 trace_i915_gem_request_wait_begin(ring, seqno); 1193 getrawmonotonic(&before); 1194 1195 #define EXIT_COND \ 1196 (i915_seqno_passed(ring->get_seqno(ring, false), seqno) || \ 1197 i915_reset_in_progress(&dev_priv->gpu_error) || \ 1198 reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) 1199 do { 1200 if (interruptible) 1201 end = wait_event_interruptible_timeout(ring->irq_queue, 1202 EXIT_COND, 1203 timeout_jiffies); 1204 else 1205 end = wait_event_timeout(ring->irq_queue, EXIT_COND, 1206 timeout_jiffies); 1207 1208 /* We need to check whether any gpu reset happened in between 1209 * the caller grabbing the seqno and now ... */ 1210 if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) 1211 end = -EAGAIN; 1212 1213 /* ... but upgrade the -EGAIN to an -EIO if the gpu is truely 1214 * gone. */ 1215 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); 1216 if (ret) 1217 end = ret; 1218 } while (end == 0 && wait_forever); 1219 1220 getrawmonotonic(&now); 1221 1222 ring->irq_put(ring); 1223 trace_i915_gem_request_wait_end(ring, seqno); 1224 #undef EXIT_COND 1225 1226 if (timeout) { 1227 struct timespec sleep_time = timespec_sub(now, before); 1228 *timeout = timespec_sub(*timeout, sleep_time); 1229 if (!timespec_valid(timeout)) /* i.e. negative time remains */ 1230 set_normalized_timespec(timeout, 0, 0); 1231 } 1232 1233 switch (end) { 1234 case -EIO: 1235 case -EAGAIN: /* Wedged */ 1236 case -ERESTARTSYS: /* Signal */ 1237 return (int)end; 1238 case 0: /* Timeout */ 1239 return -ETIMEDOUT; /* -ETIME on Linux */ 1240 default: /* Completed */ 1241 WARN_ON(end < 0); /* We're not aware of other errors */ 1242 return 0; 1243 } 1244 } 1245 1246 /** 1247 * Waits for a sequence number to be signaled, and cleans up the 1248 * request and object lists appropriately for that event. 1249 */ 1250 int 1251 i915_wait_seqno(struct intel_engine_cs *ring, uint32_t seqno) 1252 { 1253 struct drm_device *dev = ring->dev; 1254 struct drm_i915_private *dev_priv = dev->dev_private; 1255 bool interruptible = dev_priv->mm.interruptible; 1256 int ret; 1257 1258 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1259 BUG_ON(seqno == 0); 1260 1261 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); 1262 if (ret) 1263 return ret; 1264 1265 ret = i915_gem_check_olr(ring, seqno); 1266 if (ret) 1267 return ret; 1268 1269 return __wait_seqno(ring, seqno, 1270 atomic_read(&dev_priv->gpu_error.reset_counter), 1271 interruptible, NULL, NULL); 1272 } 1273 1274 static int 1275 i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj, 1276 struct intel_engine_cs *ring) 1277 { 1278 if (!obj->active) 1279 return 0; 1280 1281 /* Manually manage the write flush as we may have not yet 1282 * retired the buffer. 1283 * 1284 * Note that the last_write_seqno is always the earlier of 1285 * the two (read/write) seqno, so if we haved successfully waited, 1286 * we know we have passed the last write. 1287 */ 1288 obj->last_write_seqno = 0; 1289 1290 return 0; 1291 } 1292 1293 /** 1294 * Ensures that all rendering to the object has completed and the object is 1295 * safe to unbind from the GTT or access from the CPU. 1296 */ 1297 static __must_check int 1298 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, 1299 bool readonly) 1300 { 1301 struct intel_engine_cs *ring = obj->ring; 1302 u32 seqno; 1303 int ret; 1304 1305 seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno; 1306 if (seqno == 0) 1307 return 0; 1308 1309 ret = i915_wait_seqno(ring, seqno); 1310 if (ret) 1311 return ret; 1312 1313 return i915_gem_object_wait_rendering__tail(obj, ring); 1314 } 1315 1316 /* A nonblocking variant of the above wait. This is a highly dangerous routine 1317 * as the object state may change during this call. 1318 */ 1319 static __must_check int 1320 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, 1321 struct drm_i915_file_private *file_priv, 1322 bool readonly) 1323 { 1324 struct drm_device *dev = obj->base.dev; 1325 struct drm_i915_private *dev_priv = dev->dev_private; 1326 struct intel_engine_cs *ring = obj->ring; 1327 unsigned reset_counter; 1328 u32 seqno; 1329 int ret; 1330 1331 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1332 BUG_ON(!dev_priv->mm.interruptible); 1333 1334 seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno; 1335 if (seqno == 0) 1336 return 0; 1337 1338 ret = i915_gem_check_wedge(&dev_priv->gpu_error, true); 1339 if (ret) 1340 return ret; 1341 1342 ret = i915_gem_check_olr(ring, seqno); 1343 if (ret) 1344 return ret; 1345 1346 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 1347 mutex_unlock(&dev->struct_mutex); 1348 ret = __wait_seqno(ring, seqno, reset_counter, true, NULL, file_priv); 1349 mutex_lock(&dev->struct_mutex); 1350 if (ret) 1351 return ret; 1352 1353 return i915_gem_object_wait_rendering__tail(obj, ring); 1354 } 1355 1356 /** 1357 * Called when user space prepares to use an object with the CPU, either 1358 * through the mmap ioctl's mapping or a GTT mapping. 1359 */ 1360 int 1361 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1362 struct drm_file *file) 1363 { 1364 struct drm_i915_gem_set_domain *args = data; 1365 struct drm_i915_gem_object *obj; 1366 uint32_t read_domains = args->read_domains; 1367 uint32_t write_domain = args->write_domain; 1368 int ret; 1369 1370 /* Only handle setting domains to types used by the CPU. */ 1371 if (write_domain & I915_GEM_GPU_DOMAINS) 1372 return -EINVAL; 1373 1374 if (read_domains & I915_GEM_GPU_DOMAINS) 1375 return -EINVAL; 1376 1377 /* Having something in the write domain implies it's in the read 1378 * domain, and only that read domain. Enforce that in the request. 1379 */ 1380 if (write_domain != 0 && read_domains != write_domain) 1381 return -EINVAL; 1382 1383 ret = i915_mutex_lock_interruptible(dev); 1384 if (ret) 1385 return ret; 1386 1387 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1388 if (&obj->base == NULL) { 1389 ret = -ENOENT; 1390 goto unlock; 1391 } 1392 1393 /* Try to flush the object off the GPU without holding the lock. 1394 * We will repeat the flush holding the lock in the normal manner 1395 * to catch cases where we are gazumped. 1396 */ 1397 ret = i915_gem_object_wait_rendering__nonblocking(obj, 1398 file->driver_priv, 1399 !write_domain); 1400 if (ret) 1401 goto unref; 1402 1403 if (read_domains & I915_GEM_DOMAIN_GTT) { 1404 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 1405 1406 /* Silently promote "you're not bound, there was nothing to do" 1407 * to success, since the client was just asking us to 1408 * make sure everything was done. 1409 */ 1410 if (ret == -EINVAL) 1411 ret = 0; 1412 } else { 1413 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 1414 } 1415 1416 unref: 1417 drm_gem_object_unreference(&obj->base); 1418 unlock: 1419 mutex_unlock(&dev->struct_mutex); 1420 return ret; 1421 } 1422 1423 /** 1424 * Called when user space has done writes to this buffer 1425 */ 1426 int 1427 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1428 struct drm_file *file) 1429 { 1430 struct drm_i915_gem_sw_finish *args = data; 1431 struct drm_i915_gem_object *obj; 1432 int ret = 0; 1433 1434 ret = i915_mutex_lock_interruptible(dev); 1435 if (ret) 1436 return ret; 1437 1438 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1439 if (&obj->base == NULL) { 1440 ret = -ENOENT; 1441 goto unlock; 1442 } 1443 1444 /* Pinned buffers may be scanout, so flush the cache */ 1445 if (obj->pin_display) 1446 i915_gem_object_flush_cpu_write_domain(obj, true); 1447 1448 drm_gem_object_unreference(&obj->base); 1449 unlock: 1450 mutex_unlock(&dev->struct_mutex); 1451 return ret; 1452 } 1453 1454 /** 1455 * Maps the contents of an object, returning the address it is mapped 1456 * into. 1457 * 1458 * While the mapping holds a reference on the contents of the object, it doesn't 1459 * imply a ref on the object itself. 1460 */ 1461 int 1462 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1463 struct drm_file *file) 1464 { 1465 struct drm_i915_gem_mmap *args = data; 1466 struct drm_gem_object *obj; 1467 unsigned long addr; 1468 struct proc *p = curproc; 1469 vm_map_t map = &p->p_vmspace->vm_map; 1470 vm_size_t size; 1471 int error = 0, rv; 1472 1473 obj = drm_gem_object_lookup(dev, file, args->handle); 1474 if (obj == NULL) 1475 return -ENOENT; 1476 1477 if (args->size == 0) 1478 goto out; 1479 1480 size = round_page(args->size); 1481 if (map->size + size > p->p_rlimit[RLIMIT_VMEM].rlim_cur) { 1482 error = -ENOMEM; 1483 goto out; 1484 } 1485 1486 /* 1487 * Call hint to ensure that NULL is not returned as a valid address 1488 * and to reduce vm_map traversals. XXX causes instability, use a 1489 * fixed low address as the start point instead to avoid the NULL 1490 * return issue. 1491 */ 1492 addr = PAGE_SIZE; 1493 1494 /* 1495 * Use 256KB alignment. It is unclear why this matters for a 1496 * virtual address but it appears to fix a number of application/X 1497 * crashes and kms console switching is much faster. 1498 */ 1499 vm_object_hold(obj->vm_obj); 1500 vm_object_reference_locked(obj->vm_obj); 1501 vm_object_drop(obj->vm_obj); 1502 1503 rv = vm_map_find(map, obj->vm_obj, NULL, 1504 args->offset, &addr, args->size, 1505 256 * 1024, /* align */ 1506 TRUE, /* fitit */ 1507 VM_MAPTYPE_NORMAL, /* maptype */ 1508 VM_PROT_READ | VM_PROT_WRITE, /* prot */ 1509 VM_PROT_READ | VM_PROT_WRITE, /* max */ 1510 MAP_SHARED /* cow */); 1511 if (rv != KERN_SUCCESS) { 1512 vm_object_deallocate(obj->vm_obj); 1513 error = -vm_mmap_to_errno(rv); 1514 } else { 1515 args->addr_ptr = (uint64_t)addr; 1516 } 1517 out: 1518 drm_gem_object_unreference(obj); 1519 return (error); 1520 } 1521 1522 /** 1523 * i915_gem_fault - fault a page into the GTT 1524 * 1525 * vm_obj is locked on entry and expected to be locked on return. 1526 * 1527 * The vm_pager has placemarked the object with an anonymous memory page 1528 * which we must replace atomically to avoid races against concurrent faults 1529 * on the same page. XXX we currently are unable to do this atomically. 1530 * 1531 * If we are to return an error we should not touch the anonymous page, 1532 * the caller will deallocate it. 1533 * 1534 * XXX Most GEM calls appear to be interruptable, but we can't hard loop 1535 * in that case. Release all resources and wait 1 tick before retrying. 1536 * This is a huge problem which needs to be fixed by getting rid of most 1537 * of the interruptability. The linux code does not retry but does appear 1538 * to have some sort of mechanism (VM_FAULT_NOPAGE ?) for the higher level 1539 * to be able to retry. 1540 * 1541 * -- 1542 * 1543 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1544 * from userspace. The fault handler takes care of binding the object to 1545 * the GTT (if needed), allocating and programming a fence register (again, 1546 * only if needed based on whether the old reg is still valid or the object 1547 * is tiled) and inserting a new PTE into the faulting process. 1548 * 1549 * Note that the faulting process may involve evicting existing objects 1550 * from the GTT and/or fence registers to make room. So performance may 1551 * suffer if the GTT working set is large or there are few fence registers 1552 * left. 1553 * 1554 * vm_obj is locked on entry and expected to be locked on return. The VM 1555 * pager has placed an anonymous memory page at (obj,offset) which we have 1556 * to replace. 1557 */ 1558 int i915_gem_fault(vm_object_t vm_obj, vm_ooffset_t offset, int prot, vm_page_t *mres) 1559 { 1560 struct drm_i915_gem_object *obj = to_intel_bo(vm_obj->handle); 1561 struct drm_device *dev = obj->base.dev; 1562 struct drm_i915_private *dev_priv = dev->dev_private; 1563 unsigned long page_offset; 1564 vm_page_t m, oldm = NULL; 1565 int ret = 0; 1566 int didpip = 0; 1567 bool write = !!(prot & VM_PROT_WRITE); 1568 1569 intel_runtime_pm_get(dev_priv); 1570 1571 /* We don't use vmf->pgoff since that has the fake offset */ 1572 page_offset = (unsigned long)offset; 1573 1574 retry: 1575 ret = i915_mutex_lock_interruptible(dev); 1576 if (ret) 1577 goto out; 1578 1579 trace_i915_gem_object_fault(obj, page_offset, true, write); 1580 1581 /* Try to flush the object off the GPU first without holding the lock. 1582 * Upon reacquiring the lock, we will perform our sanity checks and then 1583 * repeat the flush holding the lock in the normal manner to catch cases 1584 * where we are gazumped. 1585 */ 1586 ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write); 1587 if (ret) 1588 goto unlock; 1589 1590 /* Access to snoopable pages through the GTT is incoherent. */ 1591 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) { 1592 ret = -EFAULT; 1593 goto unlock; 1594 } 1595 1596 /* Now bind it into the GTT if needed */ 1597 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE); 1598 if (ret) 1599 goto unlock; 1600 1601 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1602 if (ret) 1603 goto unpin; 1604 1605 ret = i915_gem_object_get_fence(obj); 1606 if (ret) 1607 goto unpin; 1608 1609 /* 1610 * START FREEBSD MAGIC 1611 * 1612 * Add a pip count to avoid destruction and certain other 1613 * complex operations (such as collapses?) while unlocked. 1614 */ 1615 if (didpip == 0) { 1616 vm_object_pip_add(vm_obj, 1); 1617 didpip = 1; 1618 } 1619 1620 /* 1621 * XXX We must currently remove the placeholder page now to avoid 1622 * a deadlock against a concurrent i915_gem_release_mmap(). 1623 * Otherwise concurrent operation will block on the busy page 1624 * while holding locks which we need to obtain. 1625 */ 1626 if (*mres != NULL) { 1627 oldm = *mres; 1628 if ((oldm->flags & PG_BUSY) == 0) 1629 kprintf("i915_gem_fault: Page was not busy\n"); 1630 else 1631 vm_page_remove(oldm); 1632 *mres = NULL; 1633 } else { 1634 oldm = NULL; 1635 } 1636 1637 VM_OBJECT_UNLOCK(vm_obj); 1638 ret = 0; 1639 m = NULL; 1640 1641 /* 1642 * Since the object lock was dropped, another thread might have 1643 * faulted on the same GTT address and instantiated the mapping. 1644 * Recheck. 1645 */ 1646 VM_OBJECT_LOCK(vm_obj); 1647 m = vm_page_lookup(vm_obj, OFF_TO_IDX(offset)); 1648 if (m != NULL) { 1649 /* 1650 * Try to busy the page, retry on failure (non-zero ret). 1651 */ 1652 if (vm_page_busy_try(m, false)) { 1653 kprintf("i915_gem_fault: PG_BUSY\n"); 1654 VM_OBJECT_UNLOCK(vm_obj); 1655 mutex_unlock(&dev->struct_mutex); 1656 int dummy; 1657 tsleep(&dummy, 0, "delay", 1); /* XXX */ 1658 VM_OBJECT_LOCK(vm_obj); 1659 goto retry; 1660 } 1661 goto have_page; 1662 } 1663 /* 1664 * END FREEBSD MAGIC 1665 */ 1666 1667 /* 1668 * Object must be unlocked here to avoid deadlock during 1669 * other GEM calls. All goto targets expect the object to 1670 * be locked. 1671 */ 1672 VM_OBJECT_UNLOCK(vm_obj); 1673 1674 obj->fault_mappable = true; 1675 1676 /* 1677 * Relock object for insertion, leave locked for return. 1678 */ 1679 VM_OBJECT_LOCK(vm_obj); 1680 m = vm_phys_fictitious_to_vm_page(dev_priv->gtt.mappable_base + 1681 i915_gem_obj_ggtt_offset(obj) + 1682 offset); 1683 if (m == NULL) { 1684 ret = -EFAULT; 1685 goto unpin; 1686 } 1687 KASSERT((m->flags & PG_FICTITIOUS) != 0, ("not fictitious %p", m)); 1688 KASSERT(m->wire_count == 1, ("wire_count not 1 %p", m)); 1689 1690 /* 1691 * Try to busy the page. Fails on non-zero return. 1692 */ 1693 if (vm_page_busy_try(m, false)) { 1694 VM_OBJECT_UNLOCK(vm_obj); 1695 i915_gem_object_ggtt_unpin(obj); 1696 kprintf("i915_gem_fault: PG_BUSY(2)\n"); 1697 i915_gem_object_ggtt_unpin(obj); 1698 mutex_unlock(&dev->struct_mutex); 1699 int dummy; 1700 tsleep(&dummy, 0, "delay", 1); /* XXX */ 1701 VM_OBJECT_LOCK(vm_obj); 1702 goto retry; 1703 } 1704 m->valid = VM_PAGE_BITS_ALL; 1705 1706 /* 1707 * Finally, remap it using the new GTT offset. 1708 * 1709 * (object expected to be in a locked state) 1710 */ 1711 vm_page_insert(m, vm_obj, OFF_TO_IDX(offset)); 1712 have_page: 1713 *mres = m; 1714 1715 i915_gem_object_ggtt_unpin(obj); 1716 mutex_unlock(&dev->struct_mutex); 1717 if (oldm != NULL) 1718 vm_page_free(oldm); 1719 if (didpip) 1720 vm_object_pip_wakeup(vm_obj); 1721 return (VM_PAGER_OK); 1722 1723 /* 1724 * ALTERNATIVE ERROR RETURN. 1725 * 1726 * OBJECT EXPECTED TO BE LOCKED. 1727 */ 1728 unpin: 1729 i915_gem_object_ggtt_unpin(obj); 1730 unlock: 1731 mutex_unlock(&dev->struct_mutex); 1732 out: 1733 switch (ret) { 1734 case -EIO: 1735 /* 1736 * We eat errors when the gpu is terminally wedged to avoid 1737 * userspace unduly crashing (gl has no provisions for mmaps to 1738 * fail). But any other -EIO isn't ours (e.g. swap in failure) 1739 * and so needs to be reported. 1740 */ 1741 if (!i915_terminally_wedged(&dev_priv->gpu_error)) { 1742 // ret = VM_FAULT_SIGBUS; 1743 break; 1744 } 1745 /* fall through */ 1746 case -EAGAIN: 1747 /* 1748 * EAGAIN means the gpu is hung and we'll wait for the error 1749 * handler to reset everything when re-faulting in 1750 * i915_mutex_lock_interruptible. 1751 */ 1752 /* fall through */ 1753 case -ERESTARTSYS: 1754 case -EINTR: 1755 VM_OBJECT_UNLOCK(vm_obj); 1756 int dummy; 1757 tsleep(&dummy, 0, "delay", 1); /* XXX */ 1758 VM_OBJECT_LOCK(vm_obj); 1759 goto retry; 1760 default: 1761 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 1762 ret = VM_PAGER_ERROR; 1763 break; 1764 } 1765 1766 intel_runtime_pm_put(dev_priv); 1767 1768 /* 1769 * Error return. We already NULL'd out *mres so we should be able 1770 * to free (oldm) here even though we are returning an error and the 1771 * caller usually handles the freeing. 1772 */ 1773 if (oldm != NULL) 1774 vm_page_free(oldm); 1775 if (didpip) 1776 vm_object_pip_wakeup(vm_obj); 1777 1778 return ret; 1779 } 1780 1781 /** 1782 * i915_gem_release_mmap - remove physical page mappings 1783 * @obj: obj in question 1784 * 1785 * Preserve the reservation of the mmapping with the DRM core code, but 1786 * relinquish ownership of the pages back to the system. 1787 * 1788 * It is vital that we remove the page mapping if we have mapped a tiled 1789 * object through the GTT and then lose the fence register due to 1790 * resource pressure. Similarly if the object has been moved out of the 1791 * aperture, than pages mapped into userspace must be revoked. Removing the 1792 * mapping will then trigger a page fault on the next user access, allowing 1793 * fixup by i915_gem_fault(). 1794 */ 1795 void 1796 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 1797 { 1798 vm_object_t devobj; 1799 vm_page_t m; 1800 int i, page_count; 1801 1802 if (!obj->fault_mappable) 1803 return; 1804 1805 devobj = cdev_pager_lookup(obj); 1806 if (devobj != NULL) { 1807 page_count = OFF_TO_IDX(obj->base.size); 1808 1809 VM_OBJECT_LOCK(devobj); 1810 for (i = 0; i < page_count; i++) { 1811 m = vm_page_lookup_busy_wait(devobj, i, TRUE, "915unm"); 1812 if (m == NULL) 1813 continue; 1814 cdev_pager_free_page(devobj, m); 1815 } 1816 VM_OBJECT_UNLOCK(devobj); 1817 vm_object_deallocate(devobj); 1818 } 1819 1820 obj->fault_mappable = false; 1821 } 1822 1823 void 1824 i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv) 1825 { 1826 struct drm_i915_gem_object *obj; 1827 1828 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) 1829 i915_gem_release_mmap(obj); 1830 } 1831 1832 uint32_t 1833 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) 1834 { 1835 uint32_t gtt_size; 1836 1837 if (INTEL_INFO(dev)->gen >= 4 || 1838 tiling_mode == I915_TILING_NONE) 1839 return size; 1840 1841 /* Previous chips need a power-of-two fence region when tiling */ 1842 if (INTEL_INFO(dev)->gen == 3) 1843 gtt_size = 1024*1024; 1844 else 1845 gtt_size = 512*1024; 1846 1847 while (gtt_size < size) 1848 gtt_size <<= 1; 1849 1850 return gtt_size; 1851 } 1852 1853 /** 1854 * i915_gem_get_gtt_alignment - return required GTT alignment for an object 1855 * @obj: object to check 1856 * 1857 * Return the required GTT alignment for an object, taking into account 1858 * potential fence register mapping. 1859 */ 1860 uint32_t 1861 i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size, 1862 int tiling_mode, bool fenced) 1863 { 1864 /* 1865 * Minimum alignment is 4k (GTT page size), but might be greater 1866 * if a fence register is needed for the object. 1867 */ 1868 if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) || 1869 tiling_mode == I915_TILING_NONE) 1870 return 4096; 1871 1872 /* 1873 * Previous chips need to be aligned to the size of the smallest 1874 * fence register that can contain the object. 1875 */ 1876 return i915_gem_get_gtt_size(dev, size, tiling_mode); 1877 } 1878 1879 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) 1880 { 1881 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 1882 int ret; 1883 1884 #if 0 1885 if (drm_vma_node_has_offset(&obj->base.vma_node)) 1886 return 0; 1887 #endif 1888 1889 dev_priv->mm.shrinker_no_lock_stealing = true; 1890 1891 ret = drm_gem_create_mmap_offset(&obj->base); 1892 if (ret != -ENOSPC) 1893 goto out; 1894 1895 /* Badly fragmented mmap space? The only way we can recover 1896 * space is by destroying unwanted objects. We can't randomly release 1897 * mmap_offsets as userspace expects them to be persistent for the 1898 * lifetime of the objects. The closest we can is to release the 1899 * offsets on purgeable objects by truncating it and marking it purged, 1900 * which prevents userspace from ever using that object again. 1901 */ 1902 i915_gem_shrink(dev_priv, 1903 obj->base.size >> PAGE_SHIFT, 1904 I915_SHRINK_BOUND | 1905 I915_SHRINK_UNBOUND | 1906 I915_SHRINK_PURGEABLE); 1907 ret = drm_gem_create_mmap_offset(&obj->base); 1908 if (ret != -ENOSPC) 1909 goto out; 1910 1911 i915_gem_shrink_all(dev_priv); 1912 ret = drm_gem_create_mmap_offset(&obj->base); 1913 out: 1914 dev_priv->mm.shrinker_no_lock_stealing = false; 1915 1916 return ret; 1917 } 1918 1919 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) 1920 { 1921 drm_gem_free_mmap_offset(&obj->base); 1922 } 1923 1924 int 1925 i915_gem_mmap_gtt(struct drm_file *file, 1926 struct drm_device *dev, 1927 uint32_t handle, 1928 uint64_t *offset) 1929 { 1930 struct drm_i915_private *dev_priv = dev->dev_private; 1931 struct drm_i915_gem_object *obj; 1932 int ret; 1933 1934 ret = i915_mutex_lock_interruptible(dev); 1935 if (ret) 1936 return ret; 1937 1938 obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle)); 1939 if (&obj->base == NULL) { 1940 ret = -ENOENT; 1941 goto unlock; 1942 } 1943 1944 if (obj->base.size > dev_priv->gtt.mappable_end) { 1945 ret = -E2BIG; 1946 goto out; 1947 } 1948 1949 if (obj->madv != I915_MADV_WILLNEED) { 1950 DRM_DEBUG("Attempting to mmap a purgeable buffer\n"); 1951 ret = -EFAULT; 1952 goto out; 1953 } 1954 1955 ret = i915_gem_object_create_mmap_offset(obj); 1956 if (ret) 1957 goto out; 1958 1959 *offset = DRM_GEM_MAPPING_OFF(obj->base.map_list.key) | 1960 DRM_GEM_MAPPING_KEY; 1961 1962 out: 1963 drm_gem_object_unreference(&obj->base); 1964 unlock: 1965 mutex_unlock(&dev->struct_mutex); 1966 return ret; 1967 } 1968 1969 /** 1970 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 1971 * @dev: DRM device 1972 * @data: GTT mapping ioctl data 1973 * @file: GEM object info 1974 * 1975 * Simply returns the fake offset to userspace so it can mmap it. 1976 * The mmap call will end up in drm_gem_mmap(), which will set things 1977 * up so we can get faults in the handler above. 1978 * 1979 * The fault handler will take care of binding the object into the GTT 1980 * (since it may have been evicted to make room for something), allocating 1981 * a fence register, and mapping the appropriate aperture address into 1982 * userspace. 1983 */ 1984 int 1985 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 1986 struct drm_file *file) 1987 { 1988 struct drm_i915_gem_mmap_gtt *args = data; 1989 1990 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); 1991 } 1992 1993 static inline int 1994 i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj) 1995 { 1996 return obj->madv == I915_MADV_DONTNEED; 1997 } 1998 1999 /* Immediately discard the backing storage */ 2000 static void 2001 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 2002 { 2003 vm_object_t vm_obj; 2004 2005 vm_obj = obj->base.vm_obj; 2006 VM_OBJECT_LOCK(vm_obj); 2007 vm_object_page_remove(vm_obj, 0, 0, false); 2008 VM_OBJECT_UNLOCK(vm_obj); 2009 2010 obj->madv = __I915_MADV_PURGED; 2011 } 2012 2013 /* Try to discard unwanted pages */ 2014 static void 2015 i915_gem_object_invalidate(struct drm_i915_gem_object *obj) 2016 { 2017 #if 0 2018 struct address_space *mapping; 2019 #endif 2020 2021 switch (obj->madv) { 2022 case I915_MADV_DONTNEED: 2023 i915_gem_object_truncate(obj); 2024 case __I915_MADV_PURGED: 2025 return; 2026 } 2027 2028 #if 0 2029 if (obj->base.filp == NULL) 2030 return; 2031 2032 mapping = file_inode(obj->base.filp)->i_mapping, 2033 invalidate_mapping_pages(mapping, 0, (loff_t)-1); 2034 #endif 2035 } 2036 2037 static void 2038 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) 2039 { 2040 int page_count = obj->base.size / PAGE_SIZE; 2041 int i, ret; 2042 2043 if (!obj->pages) 2044 return; 2045 2046 BUG_ON(obj->madv == __I915_MADV_PURGED); 2047 2048 ret = i915_gem_object_set_to_cpu_domain(obj, true); 2049 if (ret) { 2050 /* In the event of a disaster, abandon all caches and 2051 * hope for the best. 2052 */ 2053 WARN_ON(ret != -EIO); 2054 i915_gem_clflush_object(obj, true); 2055 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 2056 } 2057 2058 if (i915_gem_object_needs_bit17_swizzle(obj)) 2059 i915_gem_object_save_bit_17_swizzle(obj); 2060 2061 if (obj->madv == I915_MADV_DONTNEED) 2062 obj->dirty = 0; 2063 2064 for (i = 0; i < page_count; i++) { 2065 struct vm_page *page = obj->pages[i]; 2066 2067 if (obj->dirty) 2068 set_page_dirty(page); 2069 2070 if (obj->madv == I915_MADV_WILLNEED) 2071 mark_page_accessed(page); 2072 2073 vm_page_busy_wait(obj->pages[i], FALSE, "i915gem"); 2074 vm_page_unwire(obj->pages[i], 1); 2075 vm_page_wakeup(obj->pages[i]); 2076 } 2077 obj->dirty = 0; 2078 2079 kfree(obj->pages); 2080 obj->pages = NULL; 2081 } 2082 2083 int 2084 i915_gem_object_put_pages(struct drm_i915_gem_object *obj) 2085 { 2086 const struct drm_i915_gem_object_ops *ops = obj->ops; 2087 2088 if (obj->pages == NULL) 2089 return 0; 2090 2091 if (obj->pages_pin_count) 2092 return -EBUSY; 2093 2094 BUG_ON(i915_gem_obj_bound_any(obj)); 2095 2096 /* ->put_pages might need to allocate memory for the bit17 swizzle 2097 * array, hence protect them from being reaped by removing them from gtt 2098 * lists early. */ 2099 list_del(&obj->global_list); 2100 2101 ops->put_pages(obj); 2102 obj->pages = NULL; 2103 2104 i915_gem_object_invalidate(obj); 2105 2106 return 0; 2107 } 2108 2109 unsigned long 2110 i915_gem_shrink(struct drm_i915_private *dev_priv, 2111 long target, unsigned flags) 2112 { 2113 const bool purgeable_only = flags & I915_SHRINK_PURGEABLE; 2114 unsigned long count = 0; 2115 2116 /* 2117 * As we may completely rewrite the (un)bound list whilst unbinding 2118 * (due to retiring requests) we have to strictly process only 2119 * one element of the list at the time, and recheck the list 2120 * on every iteration. 2121 * 2122 * In particular, we must hold a reference whilst removing the 2123 * object as we may end up waiting for and/or retiring the objects. 2124 * This might release the final reference (held by the active list) 2125 * and result in the object being freed from under us. This is 2126 * similar to the precautions the eviction code must take whilst 2127 * removing objects. 2128 * 2129 * Also note that although these lists do not hold a reference to 2130 * the object we can safely grab one here: The final object 2131 * unreferencing and the bound_list are both protected by the 2132 * dev->struct_mutex and so we won't ever be able to observe an 2133 * object on the bound_list with a reference count equals 0. 2134 */ 2135 if (flags & I915_SHRINK_UNBOUND) { 2136 struct list_head still_in_list; 2137 2138 INIT_LIST_HEAD(&still_in_list); 2139 while (count < target && !list_empty(&dev_priv->mm.unbound_list)) { 2140 struct drm_i915_gem_object *obj; 2141 2142 obj = list_first_entry(&dev_priv->mm.unbound_list, 2143 typeof(*obj), global_list); 2144 list_move_tail(&obj->global_list, &still_in_list); 2145 2146 if (!i915_gem_object_is_purgeable(obj) && purgeable_only) 2147 continue; 2148 2149 drm_gem_object_reference(&obj->base); 2150 2151 if (i915_gem_object_put_pages(obj) == 0) 2152 count += obj->base.size >> PAGE_SHIFT; 2153 2154 drm_gem_object_unreference(&obj->base); 2155 } 2156 list_splice(&still_in_list, &dev_priv->mm.unbound_list); 2157 } 2158 2159 if (flags & I915_SHRINK_BOUND) { 2160 struct list_head still_in_list; 2161 2162 INIT_LIST_HEAD(&still_in_list); 2163 while (count < target && !list_empty(&dev_priv->mm.bound_list)) { 2164 struct drm_i915_gem_object *obj; 2165 struct i915_vma *vma, *v; 2166 2167 obj = list_first_entry(&dev_priv->mm.bound_list, 2168 typeof(*obj), global_list); 2169 list_move_tail(&obj->global_list, &still_in_list); 2170 2171 if (!i915_gem_object_is_purgeable(obj) && purgeable_only) 2172 continue; 2173 2174 drm_gem_object_reference(&obj->base); 2175 2176 list_for_each_entry_safe(vma, v, &obj->vma_list, vma_link) 2177 if (i915_vma_unbind(vma)) 2178 break; 2179 2180 if (i915_gem_object_put_pages(obj) == 0) 2181 count += obj->base.size >> PAGE_SHIFT; 2182 2183 drm_gem_object_unreference(&obj->base); 2184 } 2185 list_splice(&still_in_list, &dev_priv->mm.bound_list); 2186 } 2187 2188 return count; 2189 } 2190 2191 static unsigned long 2192 i915_gem_shrink_all(struct drm_i915_private *dev_priv) 2193 { 2194 i915_gem_evict_everything(dev_priv->dev); 2195 return i915_gem_shrink(dev_priv, LONG_MAX, 2196 I915_SHRINK_BOUND | I915_SHRINK_UNBOUND); 2197 } 2198 2199 static int 2200 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 2201 { 2202 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2203 int page_count, i, j; 2204 vm_object_t vm_obj; 2205 struct vm_page *page; 2206 2207 /* Assert that the object is not currently in any GPU domain. As it 2208 * wasn't in the GTT, there shouldn't be any way it could have been in 2209 * a GPU cache 2210 */ 2211 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); 2212 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); 2213 2214 page_count = obj->base.size / PAGE_SIZE; 2215 obj->pages = kmalloc(page_count * sizeof(vm_page_t), M_DRM, 2216 M_WAITOK); 2217 2218 /* Get the list of pages out of our struct file. They'll be pinned 2219 * at this point until we release them. 2220 * 2221 * Fail silently without starting the shrinker 2222 */ 2223 vm_obj = obj->base.vm_obj; 2224 VM_OBJECT_LOCK(vm_obj); 2225 for (i = 0; i < page_count; i++) { 2226 page = shmem_read_mapping_page(vm_obj, i); 2227 if (IS_ERR(page)) { 2228 i915_gem_shrink(dev_priv, 2229 page_count, 2230 I915_SHRINK_BOUND | 2231 I915_SHRINK_UNBOUND | 2232 I915_SHRINK_PURGEABLE); 2233 page = shmem_read_mapping_page(vm_obj, i); 2234 } 2235 if (IS_ERR(page)) { 2236 /* We've tried hard to allocate the memory by reaping 2237 * our own buffer, now let the real VM do its job and 2238 * go down in flames if truly OOM. 2239 */ 2240 2241 i915_gem_shrink_all(dev_priv); 2242 page = shmem_read_mapping_page(vm_obj, i); 2243 if (IS_ERR(page)) 2244 goto err_pages; 2245 } 2246 #ifdef CONFIG_SWIOTLB 2247 if (swiotlb_nr_tbl()) { 2248 st->nents++; 2249 sg_set_page(sg, page, PAGE_SIZE, 0); 2250 sg = sg_next(sg); 2251 continue; 2252 } 2253 #endif 2254 obj->pages[i] = page; 2255 } 2256 #ifdef CONFIG_SWIOTLB 2257 if (!swiotlb_nr_tbl()) 2258 #endif 2259 VM_OBJECT_UNLOCK(vm_obj); 2260 2261 if (i915_gem_object_needs_bit17_swizzle(obj)) 2262 i915_gem_object_do_bit_17_swizzle(obj); 2263 2264 return 0; 2265 2266 err_pages: 2267 for (j = 0; j < i; j++) { 2268 page = obj->pages[j]; 2269 vm_page_busy_wait(page, FALSE, "i915gem"); 2270 vm_page_unwire(page, 0); 2271 vm_page_wakeup(page); 2272 } 2273 VM_OBJECT_UNLOCK(vm_obj); 2274 kfree(obj->pages); 2275 obj->pages = NULL; 2276 return (-EIO); 2277 } 2278 2279 /* Ensure that the associated pages are gathered from the backing storage 2280 * and pinned into our object. i915_gem_object_get_pages() may be called 2281 * multiple times before they are released by a single call to 2282 * i915_gem_object_put_pages() - once the pages are no longer referenced 2283 * either as a result of memory pressure (reaping pages under the shrinker) 2284 * or as the object is itself released. 2285 */ 2286 int 2287 i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2288 { 2289 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2290 const struct drm_i915_gem_object_ops *ops = obj->ops; 2291 int ret; 2292 2293 if (obj->pages) 2294 return 0; 2295 2296 if (obj->madv != I915_MADV_WILLNEED) { 2297 DRM_DEBUG("Attempting to obtain a purgeable object\n"); 2298 return -EFAULT; 2299 } 2300 2301 BUG_ON(obj->pages_pin_count); 2302 2303 ret = ops->get_pages(obj); 2304 if (ret) 2305 return ret; 2306 2307 list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list); 2308 return 0; 2309 } 2310 2311 static void 2312 i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, 2313 struct intel_engine_cs *ring) 2314 { 2315 u32 seqno = intel_ring_get_seqno(ring); 2316 2317 BUG_ON(ring == NULL); 2318 if (obj->ring != ring && obj->last_write_seqno) { 2319 /* Keep the seqno relative to the current ring */ 2320 obj->last_write_seqno = seqno; 2321 } 2322 obj->ring = ring; 2323 2324 /* Add a reference if we're newly entering the active list. */ 2325 if (!obj->active) { 2326 drm_gem_object_reference(&obj->base); 2327 obj->active = 1; 2328 } 2329 2330 list_move_tail(&obj->ring_list, &ring->active_list); 2331 2332 obj->last_read_seqno = seqno; 2333 } 2334 2335 void i915_vma_move_to_active(struct i915_vma *vma, 2336 struct intel_engine_cs *ring) 2337 { 2338 list_move_tail(&vma->mm_list, &vma->vm->active_list); 2339 return i915_gem_object_move_to_active(vma->obj, ring); 2340 } 2341 2342 static void 2343 i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj) 2344 { 2345 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2346 struct i915_address_space *vm; 2347 struct i915_vma *vma; 2348 2349 BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS); 2350 BUG_ON(!obj->active); 2351 2352 list_for_each_entry(vm, &dev_priv->vm_list, global_link) { 2353 vma = i915_gem_obj_to_vma(obj, vm); 2354 if (vma && !list_empty(&vma->mm_list)) 2355 list_move_tail(&vma->mm_list, &vm->inactive_list); 2356 } 2357 2358 intel_fb_obj_flush(obj, true); 2359 2360 list_del_init(&obj->ring_list); 2361 obj->ring = NULL; 2362 2363 obj->last_read_seqno = 0; 2364 obj->last_write_seqno = 0; 2365 obj->base.write_domain = 0; 2366 2367 obj->last_fenced_seqno = 0; 2368 2369 obj->active = 0; 2370 drm_gem_object_unreference(&obj->base); 2371 2372 WARN_ON(i915_verify_lists(dev)); 2373 } 2374 2375 static void 2376 i915_gem_object_retire(struct drm_i915_gem_object *obj) 2377 { 2378 struct intel_engine_cs *ring = obj->ring; 2379 2380 if (ring == NULL) 2381 return; 2382 2383 if (i915_seqno_passed(ring->get_seqno(ring, true), 2384 obj->last_read_seqno)) 2385 i915_gem_object_move_to_inactive(obj); 2386 } 2387 2388 static int 2389 i915_gem_init_seqno(struct drm_device *dev, u32 seqno) 2390 { 2391 struct drm_i915_private *dev_priv = dev->dev_private; 2392 struct intel_engine_cs *ring; 2393 int ret, i, j; 2394 2395 /* Carefully retire all requests without writing to the rings */ 2396 for_each_ring(ring, dev_priv, i) { 2397 ret = intel_ring_idle(ring); 2398 if (ret) 2399 return ret; 2400 } 2401 i915_gem_retire_requests(dev); 2402 2403 /* Finally reset hw state */ 2404 for_each_ring(ring, dev_priv, i) { 2405 intel_ring_init_seqno(ring, seqno); 2406 2407 for (j = 0; j < ARRAY_SIZE(ring->semaphore.sync_seqno); j++) 2408 ring->semaphore.sync_seqno[j] = 0; 2409 } 2410 2411 return 0; 2412 } 2413 2414 int i915_gem_set_seqno(struct drm_device *dev, u32 seqno) 2415 { 2416 struct drm_i915_private *dev_priv = dev->dev_private; 2417 int ret; 2418 2419 if (seqno == 0) 2420 return -EINVAL; 2421 2422 /* HWS page needs to be set less than what we 2423 * will inject to ring 2424 */ 2425 ret = i915_gem_init_seqno(dev, seqno - 1); 2426 if (ret) 2427 return ret; 2428 2429 /* Carefully set the last_seqno value so that wrap 2430 * detection still works 2431 */ 2432 dev_priv->next_seqno = seqno; 2433 dev_priv->last_seqno = seqno - 1; 2434 if (dev_priv->last_seqno == 0) 2435 dev_priv->last_seqno--; 2436 2437 return 0; 2438 } 2439 2440 int 2441 i915_gem_get_seqno(struct drm_device *dev, u32 *seqno) 2442 { 2443 struct drm_i915_private *dev_priv = dev->dev_private; 2444 2445 /* reserve 0 for non-seqno */ 2446 if (dev_priv->next_seqno == 0) { 2447 int ret = i915_gem_init_seqno(dev, 0); 2448 if (ret) 2449 return ret; 2450 2451 dev_priv->next_seqno = 1; 2452 } 2453 2454 *seqno = dev_priv->last_seqno = dev_priv->next_seqno++; 2455 return 0; 2456 } 2457 2458 int __i915_add_request(struct intel_engine_cs *ring, 2459 struct drm_file *file, 2460 struct drm_i915_gem_object *obj, 2461 u32 *out_seqno) 2462 { 2463 struct drm_i915_private *dev_priv = ring->dev->dev_private; 2464 struct drm_i915_gem_request *request; 2465 struct intel_ringbuffer *ringbuf; 2466 u32 request_ring_position, request_start; 2467 int ret; 2468 2469 request = ring->preallocated_lazy_request; 2470 if (WARN_ON(request == NULL)) 2471 return -ENOMEM; 2472 2473 if (i915.enable_execlists) { 2474 struct intel_context *ctx = request->ctx; 2475 ringbuf = ctx->engine[ring->id].ringbuf; 2476 } else 2477 ringbuf = ring->buffer; 2478 2479 request_start = intel_ring_get_tail(ringbuf); 2480 /* 2481 * Emit any outstanding flushes - execbuf can fail to emit the flush 2482 * after having emitted the batchbuffer command. Hence we need to fix 2483 * things up similar to emitting the lazy request. The difference here 2484 * is that the flush _must_ happen before the next request, no matter 2485 * what. 2486 */ 2487 if (i915.enable_execlists) { 2488 ret = logical_ring_flush_all_caches(ringbuf); 2489 if (ret) 2490 return ret; 2491 } else { 2492 ret = intel_ring_flush_all_caches(ring); 2493 if (ret) 2494 return ret; 2495 } 2496 2497 /* Record the position of the start of the request so that 2498 * should we detect the updated seqno part-way through the 2499 * GPU processing the request, we never over-estimate the 2500 * position of the head. 2501 */ 2502 request_ring_position = intel_ring_get_tail(ringbuf); 2503 2504 if (i915.enable_execlists) { 2505 ret = ring->emit_request(ringbuf); 2506 if (ret) 2507 return ret; 2508 } else { 2509 ret = ring->add_request(ring); 2510 if (ret) 2511 return ret; 2512 } 2513 2514 request->seqno = intel_ring_get_seqno(ring); 2515 request->ring = ring; 2516 request->head = request_start; 2517 request->tail = request_ring_position; 2518 2519 /* Whilst this request exists, batch_obj will be on the 2520 * active_list, and so will hold the active reference. Only when this 2521 * request is retired will the the batch_obj be moved onto the 2522 * inactive_list and lose its active reference. Hence we do not need 2523 * to explicitly hold another reference here. 2524 */ 2525 request->batch_obj = obj; 2526 2527 if (!i915.enable_execlists) { 2528 /* Hold a reference to the current context so that we can inspect 2529 * it later in case a hangcheck error event fires. 2530 */ 2531 request->ctx = ring->last_context; 2532 if (request->ctx) 2533 i915_gem_context_reference(request->ctx); 2534 } 2535 2536 request->emitted_jiffies = jiffies; 2537 list_add_tail(&request->list, &ring->request_list); 2538 request->file_priv = NULL; 2539 2540 if (file) { 2541 struct drm_i915_file_private *file_priv = file->driver_priv; 2542 2543 spin_lock(&file_priv->mm.lock); 2544 request->file_priv = file_priv; 2545 list_add_tail(&request->client_list, 2546 &file_priv->mm.request_list); 2547 spin_unlock(&file_priv->mm.lock); 2548 } 2549 2550 trace_i915_gem_request_add(ring, request->seqno); 2551 ring->outstanding_lazy_seqno = 0; 2552 ring->preallocated_lazy_request = NULL; 2553 2554 if (!dev_priv->ums.mm_suspended) { 2555 i915_queue_hangcheck(ring->dev); 2556 2557 cancel_delayed_work_sync(&dev_priv->mm.idle_work); 2558 queue_delayed_work(dev_priv->wq, 2559 &dev_priv->mm.retire_work, 2560 round_jiffies_up_relative(HZ)); 2561 intel_mark_busy(dev_priv->dev); 2562 } 2563 2564 if (out_seqno) 2565 *out_seqno = request->seqno; 2566 return 0; 2567 } 2568 2569 static inline void 2570 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) 2571 { 2572 struct drm_i915_file_private *file_priv = request->file_priv; 2573 2574 if (!file_priv) 2575 return; 2576 2577 spin_lock(&file_priv->mm.lock); 2578 list_del(&request->client_list); 2579 request->file_priv = NULL; 2580 spin_unlock(&file_priv->mm.lock); 2581 } 2582 2583 static bool i915_context_is_banned(struct drm_i915_private *dev_priv, 2584 const struct intel_context *ctx) 2585 { 2586 unsigned long elapsed; 2587 2588 elapsed = get_seconds() - ctx->hang_stats.guilty_ts; 2589 2590 if (ctx->hang_stats.banned) 2591 return true; 2592 2593 if (elapsed <= DRM_I915_CTX_BAN_PERIOD) { 2594 if (!i915_gem_context_is_default(ctx)) { 2595 DRM_DEBUG("context hanging too fast, banning!\n"); 2596 return true; 2597 } else if (i915_stop_ring_allow_ban(dev_priv)) { 2598 if (i915_stop_ring_allow_warn(dev_priv)) 2599 DRM_ERROR("gpu hanging too fast, banning!\n"); 2600 return true; 2601 } 2602 } 2603 2604 return false; 2605 } 2606 2607 static void i915_set_reset_status(struct drm_i915_private *dev_priv, 2608 struct intel_context *ctx, 2609 const bool guilty) 2610 { 2611 struct i915_ctx_hang_stats *hs; 2612 2613 if (WARN_ON(!ctx)) 2614 return; 2615 2616 hs = &ctx->hang_stats; 2617 2618 if (guilty) { 2619 hs->banned = i915_context_is_banned(dev_priv, ctx); 2620 hs->batch_active++; 2621 hs->guilty_ts = get_seconds(); 2622 } else { 2623 hs->batch_pending++; 2624 } 2625 } 2626 2627 static void i915_gem_free_request(struct drm_i915_gem_request *request) 2628 { 2629 list_del(&request->list); 2630 i915_gem_request_remove_from_client(request); 2631 2632 if (request->ctx) 2633 i915_gem_context_unreference(request->ctx); 2634 2635 kfree(request); 2636 } 2637 2638 struct drm_i915_gem_request * 2639 i915_gem_find_active_request(struct intel_engine_cs *ring) 2640 { 2641 struct drm_i915_gem_request *request; 2642 u32 completed_seqno; 2643 2644 completed_seqno = ring->get_seqno(ring, false); 2645 2646 list_for_each_entry(request, &ring->request_list, list) { 2647 if (i915_seqno_passed(completed_seqno, request->seqno)) 2648 continue; 2649 2650 return request; 2651 } 2652 2653 return NULL; 2654 } 2655 2656 static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv, 2657 struct intel_engine_cs *ring) 2658 { 2659 struct drm_i915_gem_request *request; 2660 bool ring_hung; 2661 2662 request = i915_gem_find_active_request(ring); 2663 2664 if (request == NULL) 2665 return; 2666 2667 ring_hung = ring->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG; 2668 2669 i915_set_reset_status(dev_priv, request->ctx, ring_hung); 2670 2671 list_for_each_entry_continue(request, &ring->request_list, list) 2672 i915_set_reset_status(dev_priv, request->ctx, false); 2673 } 2674 2675 static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv, 2676 struct intel_engine_cs *ring) 2677 { 2678 while (!list_empty(&ring->active_list)) { 2679 struct drm_i915_gem_object *obj; 2680 2681 obj = list_first_entry(&ring->active_list, 2682 struct drm_i915_gem_object, 2683 ring_list); 2684 2685 i915_gem_object_move_to_inactive(obj); 2686 } 2687 2688 /* 2689 * We must free the requests after all the corresponding objects have 2690 * been moved off active lists. Which is the same order as the normal 2691 * retire_requests function does. This is important if object hold 2692 * implicit references on things like e.g. ppgtt address spaces through 2693 * the request. 2694 */ 2695 while (!list_empty(&ring->request_list)) { 2696 struct drm_i915_gem_request *request; 2697 2698 request = list_first_entry(&ring->request_list, 2699 struct drm_i915_gem_request, 2700 list); 2701 2702 i915_gem_free_request(request); 2703 } 2704 2705 while (!list_empty(&ring->execlist_queue)) { 2706 struct intel_ctx_submit_request *submit_req; 2707 2708 submit_req = list_first_entry(&ring->execlist_queue, 2709 struct intel_ctx_submit_request, 2710 execlist_link); 2711 list_del(&submit_req->execlist_link); 2712 intel_runtime_pm_put(dev_priv); 2713 i915_gem_context_unreference(submit_req->ctx); 2714 kfree(submit_req); 2715 } 2716 2717 /* These may not have been flush before the reset, do so now */ 2718 kfree(ring->preallocated_lazy_request); 2719 ring->preallocated_lazy_request = NULL; 2720 ring->outstanding_lazy_seqno = 0; 2721 } 2722 2723 void i915_gem_restore_fences(struct drm_device *dev) 2724 { 2725 struct drm_i915_private *dev_priv = dev->dev_private; 2726 int i; 2727 2728 for (i = 0; i < dev_priv->num_fence_regs; i++) { 2729 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; 2730 2731 /* 2732 * Commit delayed tiling changes if we have an object still 2733 * attached to the fence, otherwise just clear the fence. 2734 */ 2735 if (reg->obj) { 2736 i915_gem_object_update_fence(reg->obj, reg, 2737 reg->obj->tiling_mode); 2738 } else { 2739 i915_gem_write_fence(dev, i, NULL); 2740 } 2741 } 2742 } 2743 2744 void i915_gem_reset(struct drm_device *dev) 2745 { 2746 struct drm_i915_private *dev_priv = dev->dev_private; 2747 struct intel_engine_cs *ring; 2748 int i; 2749 2750 /* 2751 * Before we free the objects from the requests, we need to inspect 2752 * them for finding the guilty party. As the requests only borrow 2753 * their reference to the objects, the inspection must be done first. 2754 */ 2755 for_each_ring(ring, dev_priv, i) 2756 i915_gem_reset_ring_status(dev_priv, ring); 2757 2758 for_each_ring(ring, dev_priv, i) 2759 i915_gem_reset_ring_cleanup(dev_priv, ring); 2760 2761 i915_gem_context_reset(dev); 2762 2763 i915_gem_restore_fences(dev); 2764 } 2765 2766 /** 2767 * This function clears the request list as sequence numbers are passed. 2768 */ 2769 void 2770 i915_gem_retire_requests_ring(struct intel_engine_cs *ring) 2771 { 2772 uint32_t seqno; 2773 2774 if (list_empty(&ring->request_list)) 2775 return; 2776 2777 WARN_ON(i915_verify_lists(ring->dev)); 2778 2779 seqno = ring->get_seqno(ring, true); 2780 2781 /* Move any buffers on the active list that are no longer referenced 2782 * by the ringbuffer to the flushing/inactive lists as appropriate, 2783 * before we free the context associated with the requests. 2784 */ 2785 while (!list_empty(&ring->active_list)) { 2786 struct drm_i915_gem_object *obj; 2787 2788 obj = list_first_entry(&ring->active_list, 2789 struct drm_i915_gem_object, 2790 ring_list); 2791 2792 if (!i915_seqno_passed(seqno, obj->last_read_seqno)) 2793 break; 2794 2795 i915_gem_object_move_to_inactive(obj); 2796 } 2797 2798 2799 while (!list_empty(&ring->request_list)) { 2800 struct drm_i915_gem_request *request; 2801 struct intel_ringbuffer *ringbuf; 2802 2803 request = list_first_entry(&ring->request_list, 2804 struct drm_i915_gem_request, 2805 list); 2806 2807 if (!i915_seqno_passed(seqno, request->seqno)) 2808 break; 2809 2810 trace_i915_gem_request_retire(ring, request->seqno); 2811 2812 /* This is one of the few common intersection points 2813 * between legacy ringbuffer submission and execlists: 2814 * we need to tell them apart in order to find the correct 2815 * ringbuffer to which the request belongs to. 2816 */ 2817 if (i915.enable_execlists) { 2818 struct intel_context *ctx = request->ctx; 2819 ringbuf = ctx->engine[ring->id].ringbuf; 2820 } else 2821 ringbuf = ring->buffer; 2822 2823 /* We know the GPU must have read the request to have 2824 * sent us the seqno + interrupt, so use the position 2825 * of tail of the request to update the last known position 2826 * of the GPU head. 2827 */ 2828 ringbuf->last_retired_head = request->tail; 2829 2830 i915_gem_free_request(request); 2831 } 2832 2833 if (unlikely(ring->trace_irq_seqno && 2834 i915_seqno_passed(seqno, ring->trace_irq_seqno))) { 2835 ring->irq_put(ring); 2836 ring->trace_irq_seqno = 0; 2837 } 2838 2839 WARN_ON(i915_verify_lists(ring->dev)); 2840 } 2841 2842 bool 2843 i915_gem_retire_requests(struct drm_device *dev) 2844 { 2845 struct drm_i915_private *dev_priv = dev->dev_private; 2846 struct intel_engine_cs *ring; 2847 bool idle = true; 2848 int i; 2849 2850 for_each_ring(ring, dev_priv, i) { 2851 i915_gem_retire_requests_ring(ring); 2852 idle &= list_empty(&ring->request_list); 2853 } 2854 2855 if (idle) 2856 mod_delayed_work(dev_priv->wq, 2857 &dev_priv->mm.idle_work, 2858 msecs_to_jiffies(100)); 2859 2860 return idle; 2861 } 2862 2863 static void 2864 i915_gem_retire_work_handler(struct work_struct *work) 2865 { 2866 struct drm_i915_private *dev_priv = 2867 container_of(work, typeof(*dev_priv), mm.retire_work.work); 2868 struct drm_device *dev = dev_priv->dev; 2869 bool idle; 2870 2871 /* Come back later if the device is busy... */ 2872 idle = false; 2873 if (mutex_trylock(&dev->struct_mutex)) { 2874 idle = i915_gem_retire_requests(dev); 2875 mutex_unlock(&dev->struct_mutex); 2876 } 2877 if (!idle) 2878 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 2879 round_jiffies_up_relative(HZ)); 2880 } 2881 2882 static void 2883 i915_gem_idle_work_handler(struct work_struct *work) 2884 { 2885 struct drm_i915_private *dev_priv = 2886 container_of(work, typeof(*dev_priv), mm.idle_work.work); 2887 2888 intel_mark_idle(dev_priv->dev); 2889 } 2890 2891 /** 2892 * Ensures that an object will eventually get non-busy by flushing any required 2893 * write domains, emitting any outstanding lazy request and retiring and 2894 * completed requests. 2895 */ 2896 static int 2897 i915_gem_object_flush_active(struct drm_i915_gem_object *obj) 2898 { 2899 int ret; 2900 2901 if (obj->active) { 2902 ret = i915_gem_check_olr(obj->ring, obj->last_read_seqno); 2903 if (ret) 2904 return ret; 2905 2906 i915_gem_retire_requests_ring(obj->ring); 2907 } 2908 2909 return 0; 2910 } 2911 2912 /** 2913 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT 2914 * @DRM_IOCTL_ARGS: standard ioctl arguments 2915 * 2916 * Returns 0 if successful, else an error is returned with the remaining time in 2917 * the timeout parameter. 2918 * -ETIME: object is still busy after timeout 2919 * -ERESTARTSYS: signal interrupted the wait 2920 * -ENONENT: object doesn't exist 2921 * Also possible, but rare: 2922 * -EAGAIN: GPU wedged 2923 * -ENOMEM: damn 2924 * -ENODEV: Internal IRQ fail 2925 * -E?: The add request failed 2926 * 2927 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any 2928 * non-zero timeout parameter the wait ioctl will wait for the given number of 2929 * nanoseconds on an object becoming unbusy. Since the wait itself does so 2930 * without holding struct_mutex the object may become re-busied before this 2931 * function completes. A similar but shorter * race condition exists in the busy 2932 * ioctl 2933 */ 2934 int 2935 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 2936 { 2937 struct drm_i915_private *dev_priv = dev->dev_private; 2938 struct drm_i915_gem_wait *args = data; 2939 struct drm_i915_gem_object *obj; 2940 struct intel_engine_cs *ring = NULL; 2941 struct timespec timeout_stack, *timeout = NULL; 2942 unsigned reset_counter; 2943 u32 seqno = 0; 2944 int ret = 0; 2945 2946 if (args->timeout_ns >= 0) { 2947 timeout_stack = ns_to_timespec(args->timeout_ns); 2948 timeout = &timeout_stack; 2949 } 2950 2951 ret = i915_mutex_lock_interruptible(dev); 2952 if (ret) 2953 return ret; 2954 2955 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->bo_handle)); 2956 if (&obj->base == NULL) { 2957 mutex_unlock(&dev->struct_mutex); 2958 return -ENOENT; 2959 } 2960 2961 /* Need to make sure the object gets inactive eventually. */ 2962 ret = i915_gem_object_flush_active(obj); 2963 if (ret) 2964 goto out; 2965 2966 if (obj->active) { 2967 seqno = obj->last_read_seqno; 2968 ring = obj->ring; 2969 } 2970 2971 if (seqno == 0) 2972 goto out; 2973 2974 /* Do this after OLR check to make sure we make forward progress polling 2975 * on this IOCTL with a 0 timeout (like busy ioctl) 2976 */ 2977 if (!args->timeout_ns) { 2978 ret = -ETIMEDOUT; 2979 goto out; 2980 } 2981 2982 drm_gem_object_unreference(&obj->base); 2983 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 2984 mutex_unlock(&dev->struct_mutex); 2985 2986 ret = __wait_seqno(ring, seqno, reset_counter, true, timeout, file->driver_priv); 2987 if (timeout) 2988 args->timeout_ns = timespec_to_ns(timeout); 2989 return ret; 2990 2991 out: 2992 drm_gem_object_unreference(&obj->base); 2993 mutex_unlock(&dev->struct_mutex); 2994 return ret; 2995 } 2996 2997 /** 2998 * i915_gem_object_sync - sync an object to a ring. 2999 * 3000 * @obj: object which may be in use on another ring. 3001 * @to: ring we wish to use the object on. May be NULL. 3002 * 3003 * This code is meant to abstract object synchronization with the GPU. 3004 * Calling with NULL implies synchronizing the object with the CPU 3005 * rather than a particular GPU ring. 3006 * 3007 * Returns 0 if successful, else propagates up the lower layer error. 3008 */ 3009 int 3010 i915_gem_object_sync(struct drm_i915_gem_object *obj, 3011 struct intel_engine_cs *to) 3012 { 3013 struct intel_engine_cs *from = obj->ring; 3014 u32 seqno; 3015 int ret, idx; 3016 3017 if (from == NULL || to == from) 3018 return 0; 3019 3020 if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev)) 3021 return i915_gem_object_wait_rendering(obj, false); 3022 3023 idx = intel_ring_sync_index(from, to); 3024 3025 seqno = obj->last_read_seqno; 3026 /* Optimization: Avoid semaphore sync when we are sure we already 3027 * waited for an object with higher seqno */ 3028 if (seqno <= from->semaphore.sync_seqno[idx]) 3029 return 0; 3030 3031 ret = i915_gem_check_olr(obj->ring, seqno); 3032 if (ret) 3033 return ret; 3034 3035 trace_i915_gem_ring_sync_to(from, to, seqno); 3036 ret = to->semaphore.sync_to(to, from, seqno); 3037 if (!ret) 3038 /* We use last_read_seqno because sync_to() 3039 * might have just caused seqno wrap under 3040 * the radar. 3041 */ 3042 from->semaphore.sync_seqno[idx] = obj->last_read_seqno; 3043 3044 return ret; 3045 } 3046 3047 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) 3048 { 3049 u32 old_write_domain, old_read_domains; 3050 3051 /* Force a pagefault for domain tracking on next user access */ 3052 i915_gem_release_mmap(obj); 3053 3054 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3055 return; 3056 3057 /* Wait for any direct GTT access to complete */ 3058 mb(); 3059 3060 old_read_domains = obj->base.read_domains; 3061 old_write_domain = obj->base.write_domain; 3062 3063 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT; 3064 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT; 3065 3066 trace_i915_gem_object_change_domain(obj, 3067 old_read_domains, 3068 old_write_domain); 3069 } 3070 3071 int i915_vma_unbind(struct i915_vma *vma) 3072 { 3073 struct drm_i915_gem_object *obj = vma->obj; 3074 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3075 int ret; 3076 3077 if (list_empty(&vma->vma_link)) 3078 return 0; 3079 3080 if (!drm_mm_node_allocated(&vma->node)) { 3081 i915_gem_vma_destroy(vma); 3082 return 0; 3083 } 3084 3085 if (vma->pin_count) 3086 return -EBUSY; 3087 3088 BUG_ON(obj->pages == NULL); 3089 3090 ret = i915_gem_object_finish_gpu(obj); 3091 if (ret) 3092 return ret; 3093 /* Continue on if we fail due to EIO, the GPU is hung so we 3094 * should be safe and we need to cleanup or else we might 3095 * cause memory corruption through use-after-free. 3096 */ 3097 3098 /* Throw away the active reference before moving to the unbound list */ 3099 i915_gem_object_retire(obj); 3100 3101 if (i915_is_ggtt(vma->vm)) { 3102 i915_gem_object_finish_gtt(obj); 3103 3104 /* release the fence reg _after_ flushing */ 3105 ret = i915_gem_object_put_fence(obj); 3106 if (ret) 3107 return ret; 3108 } 3109 3110 trace_i915_vma_unbind(vma); 3111 3112 vma->unbind_vma(vma); 3113 3114 list_del_init(&vma->mm_list); 3115 if (i915_is_ggtt(vma->vm)) 3116 obj->map_and_fenceable = false; 3117 3118 drm_mm_remove_node(&vma->node); 3119 i915_gem_vma_destroy(vma); 3120 3121 /* Since the unbound list is global, only move to that list if 3122 * no more VMAs exist. */ 3123 if (list_empty(&obj->vma_list)) { 3124 i915_gem_gtt_finish_object(obj); 3125 list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list); 3126 } 3127 3128 /* And finally now the object is completely decoupled from this vma, 3129 * we can drop its hold on the backing storage and allow it to be 3130 * reaped by the shrinker. 3131 */ 3132 i915_gem_object_unpin_pages(obj); 3133 3134 return 0; 3135 } 3136 3137 int i915_gpu_idle(struct drm_device *dev) 3138 { 3139 struct drm_i915_private *dev_priv = dev->dev_private; 3140 struct intel_engine_cs *ring; 3141 int ret, i; 3142 3143 /* Flush everything onto the inactive list. */ 3144 for_each_ring(ring, dev_priv, i) { 3145 if (!i915.enable_execlists) { 3146 ret = i915_switch_context(ring, ring->default_context); 3147 if (ret) 3148 return ret; 3149 } 3150 3151 ret = intel_ring_idle(ring); 3152 if (ret) 3153 return ret; 3154 } 3155 3156 return 0; 3157 } 3158 3159 static void i965_write_fence_reg(struct drm_device *dev, int reg, 3160 struct drm_i915_gem_object *obj) 3161 { 3162 struct drm_i915_private *dev_priv = dev->dev_private; 3163 int fence_reg; 3164 int fence_pitch_shift; 3165 3166 if (INTEL_INFO(dev)->gen >= 6) { 3167 fence_reg = FENCE_REG_SANDYBRIDGE_0; 3168 fence_pitch_shift = SANDYBRIDGE_FENCE_PITCH_SHIFT; 3169 } else { 3170 fence_reg = FENCE_REG_965_0; 3171 fence_pitch_shift = I965_FENCE_PITCH_SHIFT; 3172 } 3173 3174 fence_reg += reg * 8; 3175 3176 /* To w/a incoherency with non-atomic 64-bit register updates, 3177 * we split the 64-bit update into two 32-bit writes. In order 3178 * for a partial fence not to be evaluated between writes, we 3179 * precede the update with write to turn off the fence register, 3180 * and only enable the fence as the last step. 3181 * 3182 * For extra levels of paranoia, we make sure each step lands 3183 * before applying the next step. 3184 */ 3185 I915_WRITE(fence_reg, 0); 3186 POSTING_READ(fence_reg); 3187 3188 if (obj) { 3189 u32 size = i915_gem_obj_ggtt_size(obj); 3190 uint64_t val; 3191 3192 val = (uint64_t)((i915_gem_obj_ggtt_offset(obj) + size - 4096) & 3193 0xfffff000) << 32; 3194 val |= i915_gem_obj_ggtt_offset(obj) & 0xfffff000; 3195 val |= (uint64_t)((obj->stride / 128) - 1) << fence_pitch_shift; 3196 if (obj->tiling_mode == I915_TILING_Y) 3197 val |= 1 << I965_FENCE_TILING_Y_SHIFT; 3198 val |= I965_FENCE_REG_VALID; 3199 3200 I915_WRITE(fence_reg + 4, val >> 32); 3201 POSTING_READ(fence_reg + 4); 3202 3203 I915_WRITE(fence_reg + 0, val); 3204 POSTING_READ(fence_reg); 3205 } else { 3206 I915_WRITE(fence_reg + 4, 0); 3207 POSTING_READ(fence_reg + 4); 3208 } 3209 } 3210 3211 static void i915_write_fence_reg(struct drm_device *dev, int reg, 3212 struct drm_i915_gem_object *obj) 3213 { 3214 struct drm_i915_private *dev_priv = dev->dev_private; 3215 u32 val; 3216 3217 if (obj) { 3218 u32 size = i915_gem_obj_ggtt_size(obj); 3219 int pitch_val; 3220 int tile_width; 3221 3222 WARN((i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK) || 3223 (size & -size) != size || 3224 (i915_gem_obj_ggtt_offset(obj) & (size - 1)), 3225 "object 0x%08lx [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n", 3226 i915_gem_obj_ggtt_offset(obj), obj->map_and_fenceable, size); 3227 3228 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)) 3229 tile_width = 128; 3230 else 3231 tile_width = 512; 3232 3233 /* Note: pitch better be a power of two tile widths */ 3234 pitch_val = obj->stride / tile_width; 3235 pitch_val = ffs(pitch_val) - 1; 3236 3237 val = i915_gem_obj_ggtt_offset(obj); 3238 if (obj->tiling_mode == I915_TILING_Y) 3239 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 3240 val |= I915_FENCE_SIZE_BITS(size); 3241 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 3242 val |= I830_FENCE_REG_VALID; 3243 } else 3244 val = 0; 3245 3246 if (reg < 8) 3247 reg = FENCE_REG_830_0 + reg * 4; 3248 else 3249 reg = FENCE_REG_945_8 + (reg - 8) * 4; 3250 3251 I915_WRITE(reg, val); 3252 POSTING_READ(reg); 3253 } 3254 3255 static void i830_write_fence_reg(struct drm_device *dev, int reg, 3256 struct drm_i915_gem_object *obj) 3257 { 3258 struct drm_i915_private *dev_priv = dev->dev_private; 3259 uint32_t val; 3260 3261 if (obj) { 3262 u32 size = i915_gem_obj_ggtt_size(obj); 3263 uint32_t pitch_val; 3264 3265 WARN((i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK) || 3266 (size & -size) != size || 3267 (i915_gem_obj_ggtt_offset(obj) & (size - 1)), 3268 "object 0x%08lx not 512K or pot-size 0x%08x aligned\n", 3269 i915_gem_obj_ggtt_offset(obj), size); 3270 3271 pitch_val = obj->stride / 128; 3272 pitch_val = ffs(pitch_val) - 1; 3273 3274 val = i915_gem_obj_ggtt_offset(obj); 3275 if (obj->tiling_mode == I915_TILING_Y) 3276 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 3277 val |= I830_FENCE_SIZE_BITS(size); 3278 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 3279 val |= I830_FENCE_REG_VALID; 3280 } else 3281 val = 0; 3282 3283 I915_WRITE(FENCE_REG_830_0 + reg * 4, val); 3284 POSTING_READ(FENCE_REG_830_0 + reg * 4); 3285 } 3286 3287 inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj) 3288 { 3289 return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT; 3290 } 3291 3292 static void i915_gem_write_fence(struct drm_device *dev, int reg, 3293 struct drm_i915_gem_object *obj) 3294 { 3295 struct drm_i915_private *dev_priv = dev->dev_private; 3296 3297 /* Ensure that all CPU reads are completed before installing a fence 3298 * and all writes before removing the fence. 3299 */ 3300 if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj)) 3301 mb(); 3302 3303 WARN(obj && (!obj->stride || !obj->tiling_mode), 3304 "bogus fence setup with stride: 0x%x, tiling mode: %i\n", 3305 obj->stride, obj->tiling_mode); 3306 3307 switch (INTEL_INFO(dev)->gen) { 3308 case 8: 3309 case 7: 3310 case 6: 3311 case 5: 3312 case 4: i965_write_fence_reg(dev, reg, obj); break; 3313 case 3: i915_write_fence_reg(dev, reg, obj); break; 3314 case 2: i830_write_fence_reg(dev, reg, obj); break; 3315 default: BUG(); 3316 } 3317 3318 /* And similarly be paranoid that no direct access to this region 3319 * is reordered to before the fence is installed. 3320 */ 3321 if (i915_gem_object_needs_mb(obj)) 3322 mb(); 3323 } 3324 3325 static inline int fence_number(struct drm_i915_private *dev_priv, 3326 struct drm_i915_fence_reg *fence) 3327 { 3328 return fence - dev_priv->fence_regs; 3329 } 3330 3331 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, 3332 struct drm_i915_fence_reg *fence, 3333 bool enable) 3334 { 3335 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3336 int reg = fence_number(dev_priv, fence); 3337 3338 i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL); 3339 3340 if (enable) { 3341 obj->fence_reg = reg; 3342 fence->obj = obj; 3343 list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list); 3344 } else { 3345 obj->fence_reg = I915_FENCE_REG_NONE; 3346 fence->obj = NULL; 3347 list_del_init(&fence->lru_list); 3348 } 3349 obj->fence_dirty = false; 3350 } 3351 3352 static int 3353 i915_gem_object_wait_fence(struct drm_i915_gem_object *obj) 3354 { 3355 if (obj->last_fenced_seqno) { 3356 int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno); 3357 if (ret) 3358 return ret; 3359 3360 obj->last_fenced_seqno = 0; 3361 } 3362 3363 return 0; 3364 } 3365 3366 int 3367 i915_gem_object_put_fence(struct drm_i915_gem_object *obj) 3368 { 3369 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3370 struct drm_i915_fence_reg *fence; 3371 int ret; 3372 3373 ret = i915_gem_object_wait_fence(obj); 3374 if (ret) 3375 return ret; 3376 3377 if (obj->fence_reg == I915_FENCE_REG_NONE) 3378 return 0; 3379 3380 fence = &dev_priv->fence_regs[obj->fence_reg]; 3381 3382 if (WARN_ON(fence->pin_count)) 3383 return -EBUSY; 3384 3385 i915_gem_object_fence_lost(obj); 3386 i915_gem_object_update_fence(obj, fence, false); 3387 3388 return 0; 3389 } 3390 3391 static struct drm_i915_fence_reg * 3392 i915_find_fence_reg(struct drm_device *dev) 3393 { 3394 struct drm_i915_private *dev_priv = dev->dev_private; 3395 struct drm_i915_fence_reg *reg, *avail; 3396 int i; 3397 3398 /* First try to find a free reg */ 3399 avail = NULL; 3400 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) { 3401 reg = &dev_priv->fence_regs[i]; 3402 if (!reg->obj) 3403 return reg; 3404 3405 if (!reg->pin_count) 3406 avail = reg; 3407 } 3408 3409 if (avail == NULL) 3410 goto deadlock; 3411 3412 /* None available, try to steal one or wait for a user to finish */ 3413 list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) { 3414 if (reg->pin_count) 3415 continue; 3416 3417 return reg; 3418 } 3419 3420 deadlock: 3421 /* Wait for completion of pending flips which consume fences */ 3422 if (intel_has_pending_fb_unpin(dev)) 3423 return ERR_PTR(-EAGAIN); 3424 3425 return ERR_PTR(-EDEADLK); 3426 } 3427 3428 /** 3429 * i915_gem_object_get_fence - set up fencing for an object 3430 * @obj: object to map through a fence reg 3431 * 3432 * When mapping objects through the GTT, userspace wants to be able to write 3433 * to them without having to worry about swizzling if the object is tiled. 3434 * This function walks the fence regs looking for a free one for @obj, 3435 * stealing one if it can't find any. 3436 * 3437 * It then sets up the reg based on the object's properties: address, pitch 3438 * and tiling format. 3439 * 3440 * For an untiled surface, this removes any existing fence. 3441 */ 3442 int 3443 i915_gem_object_get_fence(struct drm_i915_gem_object *obj) 3444 { 3445 struct drm_device *dev = obj->base.dev; 3446 struct drm_i915_private *dev_priv = dev->dev_private; 3447 bool enable = obj->tiling_mode != I915_TILING_NONE; 3448 struct drm_i915_fence_reg *reg; 3449 int ret; 3450 3451 /* Have we updated the tiling parameters upon the object and so 3452 * will need to serialise the write to the associated fence register? 3453 */ 3454 if (obj->fence_dirty) { 3455 ret = i915_gem_object_wait_fence(obj); 3456 if (ret) 3457 return ret; 3458 } 3459 3460 /* Just update our place in the LRU if our fence is getting reused. */ 3461 if (obj->fence_reg != I915_FENCE_REG_NONE) { 3462 reg = &dev_priv->fence_regs[obj->fence_reg]; 3463 if (!obj->fence_dirty) { 3464 list_move_tail(®->lru_list, 3465 &dev_priv->mm.fence_list); 3466 return 0; 3467 } 3468 } else if (enable) { 3469 if (WARN_ON(!obj->map_and_fenceable)) 3470 return -EINVAL; 3471 3472 reg = i915_find_fence_reg(dev); 3473 if (IS_ERR(reg)) 3474 return PTR_ERR(reg); 3475 3476 if (reg->obj) { 3477 struct drm_i915_gem_object *old = reg->obj; 3478 3479 ret = i915_gem_object_wait_fence(old); 3480 if (ret) 3481 return ret; 3482 3483 i915_gem_object_fence_lost(old); 3484 } 3485 } else 3486 return 0; 3487 3488 i915_gem_object_update_fence(obj, reg, enable); 3489 3490 return 0; 3491 } 3492 3493 static bool i915_gem_valid_gtt_space(struct i915_vma *vma, 3494 unsigned long cache_level) 3495 { 3496 struct drm_mm_node *gtt_space = &vma->node; 3497 struct drm_mm_node *other; 3498 3499 /* 3500 * On some machines we have to be careful when putting differing types 3501 * of snoopable memory together to avoid the prefetcher crossing memory 3502 * domains and dying. During vm initialisation, we decide whether or not 3503 * these constraints apply and set the drm_mm.color_adjust 3504 * appropriately. 3505 */ 3506 if (vma->vm->mm.color_adjust == NULL) 3507 return true; 3508 3509 if (!drm_mm_node_allocated(gtt_space)) 3510 return true; 3511 3512 if (list_empty(>t_space->node_list)) 3513 return true; 3514 3515 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list); 3516 if (other->allocated && !other->hole_follows && other->color != cache_level) 3517 return false; 3518 3519 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list); 3520 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level) 3521 return false; 3522 3523 return true; 3524 } 3525 3526 static void i915_gem_verify_gtt(struct drm_device *dev) 3527 { 3528 #if WATCH_GTT 3529 struct drm_i915_private *dev_priv = dev->dev_private; 3530 struct drm_i915_gem_object *obj; 3531 int err = 0; 3532 3533 list_for_each_entry(obj, &dev_priv->mm.gtt_list, global_list) { 3534 if (obj->gtt_space == NULL) { 3535 printk(KERN_ERR "object found on GTT list with no space reserved\n"); 3536 err++; 3537 continue; 3538 } 3539 3540 if (obj->cache_level != obj->gtt_space->color) { 3541 printk(KERN_ERR "object reserved space [%08lx, %08lx] with wrong color, cache_level=%x, color=%lx\n", 3542 i915_gem_obj_ggtt_offset(obj), 3543 i915_gem_obj_ggtt_offset(obj) + i915_gem_obj_ggtt_size(obj), 3544 obj->cache_level, 3545 obj->gtt_space->color); 3546 err++; 3547 continue; 3548 } 3549 3550 if (!i915_gem_valid_gtt_space(dev, 3551 obj->gtt_space, 3552 obj->cache_level)) { 3553 printk(KERN_ERR "invalid GTT space found at [%08lx, %08lx] - color=%x\n", 3554 i915_gem_obj_ggtt_offset(obj), 3555 i915_gem_obj_ggtt_offset(obj) + i915_gem_obj_ggtt_size(obj), 3556 obj->cache_level); 3557 err++; 3558 continue; 3559 } 3560 } 3561 3562 WARN_ON(err); 3563 #endif 3564 } 3565 3566 /** 3567 * Finds free space in the GTT aperture and binds the object there. 3568 */ 3569 static struct i915_vma * 3570 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, 3571 struct i915_address_space *vm, 3572 unsigned alignment, 3573 uint64_t flags) 3574 { 3575 struct drm_device *dev = obj->base.dev; 3576 struct drm_i915_private *dev_priv = dev->dev_private; 3577 u32 size, fence_size, fence_alignment, unfenced_alignment; 3578 unsigned long start = 3579 flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; 3580 unsigned long end = 3581 flags & PIN_MAPPABLE ? dev_priv->gtt.mappable_end : vm->total; 3582 struct i915_vma *vma; 3583 int ret; 3584 3585 fence_size = i915_gem_get_gtt_size(dev, 3586 obj->base.size, 3587 obj->tiling_mode); 3588 fence_alignment = i915_gem_get_gtt_alignment(dev, 3589 obj->base.size, 3590 obj->tiling_mode, true); 3591 unfenced_alignment = 3592 i915_gem_get_gtt_alignment(dev, 3593 obj->base.size, 3594 obj->tiling_mode, false); 3595 3596 if (alignment == 0) 3597 alignment = flags & PIN_MAPPABLE ? fence_alignment : 3598 unfenced_alignment; 3599 if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) { 3600 DRM_DEBUG("Invalid object alignment requested %u\n", alignment); 3601 return ERR_PTR(-EINVAL); 3602 } 3603 3604 size = flags & PIN_MAPPABLE ? fence_size : obj->base.size; 3605 3606 /* If the object is bigger than the entire aperture, reject it early 3607 * before evicting everything in a vain attempt to find space. 3608 */ 3609 if (obj->base.size > end) { 3610 DRM_DEBUG("Attempting to bind an object larger than the aperture: object=%zd > %s aperture=%lu\n", 3611 obj->base.size, 3612 flags & PIN_MAPPABLE ? "mappable" : "total", 3613 end); 3614 return ERR_PTR(-E2BIG); 3615 } 3616 3617 ret = i915_gem_object_get_pages(obj); 3618 if (ret) 3619 return ERR_PTR(ret); 3620 3621 i915_gem_object_pin_pages(obj); 3622 3623 vma = i915_gem_obj_lookup_or_create_vma(obj, vm); 3624 if (IS_ERR(vma)) 3625 goto err_unpin; 3626 3627 search_free: 3628 ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, 3629 size, alignment, 3630 obj->cache_level, 3631 start, end, 3632 DRM_MM_SEARCH_DEFAULT, 3633 DRM_MM_CREATE_DEFAULT); 3634 if (ret) { 3635 ret = i915_gem_evict_something(dev, vm, size, alignment, 3636 obj->cache_level, 3637 start, end, 3638 flags); 3639 if (ret == 0) 3640 goto search_free; 3641 3642 goto err_free_vma; 3643 } 3644 if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) { 3645 ret = -EINVAL; 3646 goto err_remove_node; 3647 } 3648 3649 ret = i915_gem_gtt_prepare_object(obj); 3650 if (ret) 3651 goto err_remove_node; 3652 3653 list_move_tail(&obj->global_list, &dev_priv->mm.bound_list); 3654 list_add_tail(&vma->mm_list, &vm->inactive_list); 3655 3656 if (i915_is_ggtt(vm)) { 3657 bool mappable, fenceable; 3658 3659 fenceable = (vma->node.size == fence_size && 3660 (vma->node.start & (fence_alignment - 1)) == 0); 3661 3662 mappable = (vma->node.start + obj->base.size <= 3663 dev_priv->gtt.mappable_end); 3664 3665 obj->map_and_fenceable = mappable && fenceable; 3666 } 3667 3668 WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable); 3669 3670 trace_i915_vma_bind(vma, flags); 3671 vma->bind_vma(vma, obj->cache_level, 3672 flags & (PIN_MAPPABLE | PIN_GLOBAL) ? GLOBAL_BIND : 0); 3673 3674 i915_gem_verify_gtt(dev); 3675 return vma; 3676 3677 err_remove_node: 3678 drm_mm_remove_node(&vma->node); 3679 err_free_vma: 3680 i915_gem_vma_destroy(vma); 3681 vma = ERR_PTR(ret); 3682 err_unpin: 3683 i915_gem_object_unpin_pages(obj); 3684 return vma; 3685 } 3686 3687 bool 3688 i915_gem_clflush_object(struct drm_i915_gem_object *obj, 3689 bool force) 3690 { 3691 /* If we don't have a page list set up, then we're not pinned 3692 * to GPU, and we can ignore the cache flush because it'll happen 3693 * again at bind time. 3694 */ 3695 if (obj->pages == NULL) 3696 return false; 3697 3698 /* 3699 * Stolen memory is always coherent with the GPU as it is explicitly 3700 * marked as wc by the system, or the system is cache-coherent. 3701 */ 3702 if (obj->stolen) 3703 return false; 3704 3705 /* If the GPU is snooping the contents of the CPU cache, 3706 * we do not need to manually clear the CPU cache lines. However, 3707 * the caches are only snooped when the render cache is 3708 * flushed/invalidated. As we always have to emit invalidations 3709 * and flushes when moving into and out of the RENDER domain, correct 3710 * snooping behaviour occurs naturally as the result of our domain 3711 * tracking. 3712 */ 3713 if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) 3714 return false; 3715 3716 trace_i915_gem_object_clflush(obj); 3717 drm_clflush_pages(obj->pages, obj->base.size / PAGE_SIZE); 3718 3719 return true; 3720 } 3721 3722 /** Flushes the GTT write domain for the object if it's dirty. */ 3723 static void 3724 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) 3725 { 3726 uint32_t old_write_domain; 3727 3728 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) 3729 return; 3730 3731 /* No actual flushing is required for the GTT write domain. Writes 3732 * to it immediately go to main memory as far as we know, so there's 3733 * no chipset flush. It also doesn't land in render cache. 3734 * 3735 * However, we do have to enforce the order so that all writes through 3736 * the GTT land before any writes to the device, such as updates to 3737 * the GATT itself. 3738 */ 3739 wmb(); 3740 3741 old_write_domain = obj->base.write_domain; 3742 obj->base.write_domain = 0; 3743 3744 intel_fb_obj_flush(obj, false); 3745 3746 intel_fb_obj_flush(obj, false); 3747 3748 trace_i915_gem_object_change_domain(obj, 3749 obj->base.read_domains, 3750 old_write_domain); 3751 } 3752 3753 /** Flushes the CPU write domain for the object if it's dirty. */ 3754 static void 3755 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj, 3756 bool force) 3757 { 3758 uint32_t old_write_domain; 3759 3760 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) 3761 return; 3762 3763 if (i915_gem_clflush_object(obj, force)) 3764 i915_gem_chipset_flush(obj->base.dev); 3765 3766 old_write_domain = obj->base.write_domain; 3767 obj->base.write_domain = 0; 3768 3769 trace_i915_gem_object_change_domain(obj, 3770 obj->base.read_domains, 3771 old_write_domain); 3772 } 3773 3774 /** 3775 * Moves a single object to the GTT read, and possibly write domain. 3776 * 3777 * This function returns when the move is complete, including waiting on 3778 * flushes to occur. 3779 */ 3780 int 3781 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 3782 { 3783 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3784 struct i915_vma *vma = i915_gem_obj_to_ggtt(obj); 3785 uint32_t old_write_domain, old_read_domains; 3786 int ret; 3787 3788 /* Not valid to be called on unbound objects. */ 3789 if (vma == NULL) 3790 return -EINVAL; 3791 3792 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) 3793 return 0; 3794 3795 ret = i915_gem_object_wait_rendering(obj, !write); 3796 if (ret) 3797 return ret; 3798 3799 i915_gem_object_retire(obj); 3800 i915_gem_object_flush_cpu_write_domain(obj, false); 3801 3802 /* Serialise direct access to this object with the barriers for 3803 * coherent writes from the GPU, by effectively invalidating the 3804 * GTT domain upon first access. 3805 */ 3806 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3807 mb(); 3808 3809 old_write_domain = obj->base.write_domain; 3810 old_read_domains = obj->base.read_domains; 3811 3812 /* It should now be out of any other write domains, and we can update 3813 * the domain values for our changes. 3814 */ 3815 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 3816 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 3817 if (write) { 3818 obj->base.read_domains = I915_GEM_DOMAIN_GTT; 3819 obj->base.write_domain = I915_GEM_DOMAIN_GTT; 3820 obj->dirty = 1; 3821 } 3822 3823 if (write) 3824 intel_fb_obj_invalidate(obj, NULL); 3825 3826 trace_i915_gem_object_change_domain(obj, 3827 old_read_domains, 3828 old_write_domain); 3829 3830 /* And bump the LRU for this access */ 3831 if (i915_gem_object_is_inactive(obj)) 3832 list_move_tail(&vma->mm_list, 3833 &dev_priv->gtt.base.inactive_list); 3834 3835 return 0; 3836 } 3837 3838 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 3839 enum i915_cache_level cache_level) 3840 { 3841 struct drm_device *dev = obj->base.dev; 3842 struct i915_vma *vma, *next; 3843 int ret; 3844 3845 if (obj->cache_level == cache_level) 3846 return 0; 3847 3848 if (i915_gem_obj_is_pinned(obj)) { 3849 DRM_DEBUG("can not change the cache level of pinned objects\n"); 3850 return -EBUSY; 3851 } 3852 3853 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) { 3854 if (!i915_gem_valid_gtt_space(vma, cache_level)) { 3855 ret = i915_vma_unbind(vma); 3856 if (ret) 3857 return ret; 3858 } 3859 } 3860 3861 if (i915_gem_obj_bound_any(obj)) { 3862 ret = i915_gem_object_finish_gpu(obj); 3863 if (ret) 3864 return ret; 3865 3866 i915_gem_object_finish_gtt(obj); 3867 3868 /* Before SandyBridge, you could not use tiling or fence 3869 * registers with snooped memory, so relinquish any fences 3870 * currently pointing to our region in the aperture. 3871 */ 3872 if (INTEL_INFO(dev)->gen < 6) { 3873 ret = i915_gem_object_put_fence(obj); 3874 if (ret) 3875 return ret; 3876 } 3877 3878 list_for_each_entry(vma, &obj->vma_list, vma_link) 3879 if (drm_mm_node_allocated(&vma->node)) 3880 vma->bind_vma(vma, cache_level, 3881 obj->has_global_gtt_mapping ? GLOBAL_BIND : 0); 3882 } 3883 3884 list_for_each_entry(vma, &obj->vma_list, vma_link) 3885 vma->node.color = cache_level; 3886 obj->cache_level = cache_level; 3887 3888 if (cpu_write_needs_clflush(obj)) { 3889 u32 old_read_domains, old_write_domain; 3890 3891 /* If we're coming from LLC cached, then we haven't 3892 * actually been tracking whether the data is in the 3893 * CPU cache or not, since we only allow one bit set 3894 * in obj->write_domain and have been skipping the clflushes. 3895 * Just set it to the CPU cache for now. 3896 */ 3897 i915_gem_object_retire(obj); 3898 WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU); 3899 3900 old_read_domains = obj->base.read_domains; 3901 old_write_domain = obj->base.write_domain; 3902 3903 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 3904 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 3905 3906 trace_i915_gem_object_change_domain(obj, 3907 old_read_domains, 3908 old_write_domain); 3909 } 3910 3911 i915_gem_verify_gtt(dev); 3912 return 0; 3913 } 3914 3915 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 3916 struct drm_file *file) 3917 { 3918 struct drm_i915_gem_caching *args = data; 3919 struct drm_i915_gem_object *obj; 3920 int ret; 3921 3922 ret = i915_mutex_lock_interruptible(dev); 3923 if (ret) 3924 return ret; 3925 3926 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3927 if (&obj->base == NULL) { 3928 ret = -ENOENT; 3929 goto unlock; 3930 } 3931 3932 switch (obj->cache_level) { 3933 case I915_CACHE_LLC: 3934 case I915_CACHE_L3_LLC: 3935 args->caching = I915_CACHING_CACHED; 3936 break; 3937 3938 case I915_CACHE_WT: 3939 args->caching = I915_CACHING_DISPLAY; 3940 break; 3941 3942 default: 3943 args->caching = I915_CACHING_NONE; 3944 break; 3945 } 3946 3947 drm_gem_object_unreference(&obj->base); 3948 unlock: 3949 mutex_unlock(&dev->struct_mutex); 3950 return ret; 3951 } 3952 3953 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 3954 struct drm_file *file) 3955 { 3956 struct drm_i915_gem_caching *args = data; 3957 struct drm_i915_gem_object *obj; 3958 enum i915_cache_level level; 3959 int ret; 3960 3961 switch (args->caching) { 3962 case I915_CACHING_NONE: 3963 level = I915_CACHE_NONE; 3964 break; 3965 case I915_CACHING_CACHED: 3966 level = I915_CACHE_LLC; 3967 break; 3968 case I915_CACHING_DISPLAY: 3969 level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE; 3970 break; 3971 default: 3972 return -EINVAL; 3973 } 3974 3975 ret = i915_mutex_lock_interruptible(dev); 3976 if (ret) 3977 return ret; 3978 3979 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3980 if (&obj->base == NULL) { 3981 ret = -ENOENT; 3982 goto unlock; 3983 } 3984 3985 ret = i915_gem_object_set_cache_level(obj, level); 3986 3987 drm_gem_object_unreference(&obj->base); 3988 unlock: 3989 mutex_unlock(&dev->struct_mutex); 3990 return ret; 3991 } 3992 3993 static bool is_pin_display(struct drm_i915_gem_object *obj) 3994 { 3995 struct i915_vma *vma; 3996 3997 vma = i915_gem_obj_to_ggtt(obj); 3998 if (!vma) 3999 return false; 4000 4001 /* There are 3 sources that pin objects: 4002 * 1. The display engine (scanouts, sprites, cursors); 4003 * 2. Reservations for execbuffer; 4004 * 3. The user. 4005 * 4006 * We can ignore reservations as we hold the struct_mutex and 4007 * are only called outside of the reservation path. The user 4008 * can only increment pin_count once, and so if after 4009 * subtracting the potential reference by the user, any pin_count 4010 * remains, it must be due to another use by the display engine. 4011 */ 4012 return vma->pin_count - !!obj->user_pin_count; 4013 } 4014 4015 /* 4016 * Prepare buffer for display plane (scanout, cursors, etc). 4017 * Can be called from an uninterruptible phase (modesetting) and allows 4018 * any flushes to be pipelined (for pageflips). 4019 */ 4020 int 4021 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 4022 u32 alignment, 4023 struct intel_engine_cs *pipelined) 4024 { 4025 u32 old_read_domains, old_write_domain; 4026 bool was_pin_display; 4027 int ret; 4028 4029 if (pipelined != obj->ring) { 4030 ret = i915_gem_object_sync(obj, pipelined); 4031 if (ret) 4032 return ret; 4033 } 4034 4035 /* Mark the pin_display early so that we account for the 4036 * display coherency whilst setting up the cache domains. 4037 */ 4038 was_pin_display = obj->pin_display; 4039 obj->pin_display = true; 4040 4041 /* The display engine is not coherent with the LLC cache on gen6. As 4042 * a result, we make sure that the pinning that is about to occur is 4043 * done with uncached PTEs. This is lowest common denominator for all 4044 * chipsets. 4045 * 4046 * However for gen6+, we could do better by using the GFDT bit instead 4047 * of uncaching, which would allow us to flush all the LLC-cached data 4048 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 4049 */ 4050 ret = i915_gem_object_set_cache_level(obj, 4051 HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE); 4052 if (ret) 4053 goto err_unpin_display; 4054 4055 /* As the user may map the buffer once pinned in the display plane 4056 * (e.g. libkms for the bootup splash), we have to ensure that we 4057 * always use map_and_fenceable for all scanout buffers. 4058 */ 4059 ret = i915_gem_obj_ggtt_pin(obj, alignment, PIN_MAPPABLE); 4060 if (ret) 4061 goto err_unpin_display; 4062 4063 i915_gem_object_flush_cpu_write_domain(obj, true); 4064 4065 old_write_domain = obj->base.write_domain; 4066 old_read_domains = obj->base.read_domains; 4067 4068 /* It should now be out of any other write domains, and we can update 4069 * the domain values for our changes. 4070 */ 4071 obj->base.write_domain = 0; 4072 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 4073 4074 trace_i915_gem_object_change_domain(obj, 4075 old_read_domains, 4076 old_write_domain); 4077 4078 return 0; 4079 4080 err_unpin_display: 4081 WARN_ON(was_pin_display != is_pin_display(obj)); 4082 obj->pin_display = was_pin_display; 4083 return ret; 4084 } 4085 4086 void 4087 i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj) 4088 { 4089 i915_gem_object_ggtt_unpin(obj); 4090 obj->pin_display = is_pin_display(obj); 4091 } 4092 4093 int 4094 i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj) 4095 { 4096 int ret; 4097 4098 if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0) 4099 return 0; 4100 4101 ret = i915_gem_object_wait_rendering(obj, false); 4102 if (ret) 4103 return ret; 4104 4105 /* Ensure that we invalidate the GPU's caches and TLBs. */ 4106 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS; 4107 return 0; 4108 } 4109 4110 /** 4111 * Moves a single object to the CPU read, and possibly write domain. 4112 * 4113 * This function returns when the move is complete, including waiting on 4114 * flushes to occur. 4115 */ 4116 int 4117 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 4118 { 4119 uint32_t old_write_domain, old_read_domains; 4120 int ret; 4121 4122 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) 4123 return 0; 4124 4125 ret = i915_gem_object_wait_rendering(obj, !write); 4126 if (ret) 4127 return ret; 4128 4129 i915_gem_object_retire(obj); 4130 i915_gem_object_flush_gtt_write_domain(obj); 4131 4132 old_write_domain = obj->base.write_domain; 4133 old_read_domains = obj->base.read_domains; 4134 4135 /* Flush the CPU cache if it's still invalid. */ 4136 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { 4137 i915_gem_clflush_object(obj, false); 4138 4139 obj->base.read_domains |= I915_GEM_DOMAIN_CPU; 4140 } 4141 4142 /* It should now be out of any other write domains, and we can update 4143 * the domain values for our changes. 4144 */ 4145 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 4146 4147 /* If we're writing through the CPU, then the GPU read domains will 4148 * need to be invalidated at next use. 4149 */ 4150 if (write) { 4151 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4152 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4153 } 4154 4155 if (write) 4156 intel_fb_obj_invalidate(obj, NULL); 4157 4158 trace_i915_gem_object_change_domain(obj, 4159 old_read_domains, 4160 old_write_domain); 4161 4162 return 0; 4163 } 4164 4165 /* Throttle our rendering by waiting until the ring has completed our requests 4166 * emitted over 20 msec ago. 4167 * 4168 * Note that if we were to use the current jiffies each time around the loop, 4169 * we wouldn't escape the function with any frames outstanding if the time to 4170 * render a frame was over 20ms. 4171 * 4172 * This should get us reasonable parallelism between CPU and GPU but also 4173 * relatively low latency when blocking on a particular request to finish. 4174 */ 4175 static int 4176 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 4177 { 4178 struct drm_i915_private *dev_priv = dev->dev_private; 4179 struct drm_i915_file_private *file_priv = file->driver_priv; 4180 unsigned long recent_enough = jiffies - msecs_to_jiffies(20); 4181 struct drm_i915_gem_request *request; 4182 struct intel_engine_cs *ring = NULL; 4183 unsigned reset_counter; 4184 u32 seqno = 0; 4185 int ret; 4186 4187 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 4188 if (ret) 4189 return ret; 4190 4191 ret = i915_gem_check_wedge(&dev_priv->gpu_error, false); 4192 if (ret) 4193 return ret; 4194 4195 spin_lock(&file_priv->mm.lock); 4196 list_for_each_entry(request, &file_priv->mm.request_list, client_list) { 4197 if (time_after_eq(request->emitted_jiffies, recent_enough)) 4198 break; 4199 4200 ring = request->ring; 4201 seqno = request->seqno; 4202 } 4203 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 4204 spin_unlock(&file_priv->mm.lock); 4205 4206 if (seqno == 0) 4207 return 0; 4208 4209 ret = __wait_seqno(ring, seqno, reset_counter, true, NULL, NULL); 4210 if (ret == 0) 4211 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0); 4212 4213 return ret; 4214 } 4215 4216 static bool 4217 i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags) 4218 { 4219 struct drm_i915_gem_object *obj = vma->obj; 4220 4221 if (alignment && 4222 vma->node.start & (alignment - 1)) 4223 return true; 4224 4225 if (flags & PIN_MAPPABLE && !obj->map_and_fenceable) 4226 return true; 4227 4228 if (flags & PIN_OFFSET_BIAS && 4229 vma->node.start < (flags & PIN_OFFSET_MASK)) 4230 return true; 4231 4232 return false; 4233 } 4234 4235 int 4236 i915_gem_object_pin(struct drm_i915_gem_object *obj, 4237 struct i915_address_space *vm, 4238 uint32_t alignment, 4239 uint64_t flags) 4240 { 4241 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 4242 struct i915_vma *vma; 4243 int ret; 4244 4245 if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base)) 4246 return -ENODEV; 4247 4248 if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm))) 4249 return -EINVAL; 4250 4251 vma = i915_gem_obj_to_vma(obj, vm); 4252 if (vma) { 4253 if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) 4254 return -EBUSY; 4255 4256 if (i915_vma_misplaced(vma, alignment, flags)) { 4257 WARN(vma->pin_count, 4258 "bo is already pinned with incorrect alignment:" 4259 " offset=%lx, req.alignment=%x, req.map_and_fenceable=%d," 4260 " obj->map_and_fenceable=%d\n", 4261 i915_gem_obj_offset(obj, vm), alignment, 4262 !!(flags & PIN_MAPPABLE), 4263 obj->map_and_fenceable); 4264 ret = i915_vma_unbind(vma); 4265 if (ret) 4266 return ret; 4267 4268 vma = NULL; 4269 } 4270 } 4271 4272 if (vma == NULL || !drm_mm_node_allocated(&vma->node)) { 4273 vma = i915_gem_object_bind_to_vm(obj, vm, alignment, flags); 4274 if (IS_ERR(vma)) 4275 return PTR_ERR(vma); 4276 } 4277 4278 if (flags & PIN_GLOBAL && !obj->has_global_gtt_mapping) 4279 vma->bind_vma(vma, obj->cache_level, GLOBAL_BIND); 4280 4281 vma->pin_count++; 4282 if (flags & PIN_MAPPABLE) 4283 obj->pin_mappable |= true; 4284 4285 return 0; 4286 } 4287 4288 void 4289 i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj) 4290 { 4291 struct i915_vma *vma = i915_gem_obj_to_ggtt(obj); 4292 4293 BUG_ON(!vma); 4294 BUG_ON(vma->pin_count == 0); 4295 BUG_ON(!i915_gem_obj_ggtt_bound(obj)); 4296 4297 if (--vma->pin_count == 0) 4298 obj->pin_mappable = false; 4299 } 4300 4301 bool 4302 i915_gem_object_pin_fence(struct drm_i915_gem_object *obj) 4303 { 4304 if (obj->fence_reg != I915_FENCE_REG_NONE) { 4305 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 4306 struct i915_vma *ggtt_vma = i915_gem_obj_to_ggtt(obj); 4307 4308 WARN_ON(!ggtt_vma || 4309 dev_priv->fence_regs[obj->fence_reg].pin_count > 4310 ggtt_vma->pin_count); 4311 dev_priv->fence_regs[obj->fence_reg].pin_count++; 4312 return true; 4313 } else 4314 return false; 4315 } 4316 4317 void 4318 i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj) 4319 { 4320 if (obj->fence_reg != I915_FENCE_REG_NONE) { 4321 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 4322 WARN_ON(dev_priv->fence_regs[obj->fence_reg].pin_count <= 0); 4323 dev_priv->fence_regs[obj->fence_reg].pin_count--; 4324 } 4325 } 4326 4327 int 4328 i915_gem_pin_ioctl(struct drm_device *dev, void *data, 4329 struct drm_file *file) 4330 { 4331 struct drm_i915_gem_pin *args = data; 4332 struct drm_i915_gem_object *obj; 4333 int ret; 4334 4335 if (INTEL_INFO(dev)->gen >= 6) 4336 return -ENODEV; 4337 4338 ret = i915_mutex_lock_interruptible(dev); 4339 if (ret) 4340 return ret; 4341 4342 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4343 if (&obj->base == NULL) { 4344 ret = -ENOENT; 4345 goto unlock; 4346 } 4347 4348 if (obj->madv != I915_MADV_WILLNEED) { 4349 DRM_DEBUG("Attempting to pin a purgeable buffer\n"); 4350 ret = -EFAULT; 4351 goto out; 4352 } 4353 4354 if (obj->pin_filp != NULL && obj->pin_filp != file) { 4355 DRM_DEBUG("Already pinned in i915_gem_pin_ioctl(): %d\n", 4356 args->handle); 4357 ret = -EINVAL; 4358 goto out; 4359 } 4360 4361 if (obj->user_pin_count == ULONG_MAX) { 4362 ret = -EBUSY; 4363 goto out; 4364 } 4365 4366 if (obj->user_pin_count == 0) { 4367 ret = i915_gem_obj_ggtt_pin(obj, args->alignment, PIN_MAPPABLE); 4368 if (ret) 4369 goto out; 4370 } 4371 4372 obj->user_pin_count++; 4373 obj->pin_filp = file; 4374 4375 args->offset = i915_gem_obj_ggtt_offset(obj); 4376 out: 4377 drm_gem_object_unreference(&obj->base); 4378 unlock: 4379 mutex_unlock(&dev->struct_mutex); 4380 return ret; 4381 } 4382 4383 int 4384 i915_gem_unpin_ioctl(struct drm_device *dev, void *data, 4385 struct drm_file *file) 4386 { 4387 struct drm_i915_gem_pin *args = data; 4388 struct drm_i915_gem_object *obj; 4389 int ret; 4390 4391 ret = i915_mutex_lock_interruptible(dev); 4392 if (ret) 4393 return ret; 4394 4395 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4396 if (&obj->base == NULL) { 4397 ret = -ENOENT; 4398 goto unlock; 4399 } 4400 4401 if (obj->pin_filp != file) { 4402 DRM_DEBUG("Not pinned by caller in i915_gem_pin_ioctl(): %d\n", 4403 args->handle); 4404 ret = -EINVAL; 4405 goto out; 4406 } 4407 obj->user_pin_count--; 4408 if (obj->user_pin_count == 0) { 4409 obj->pin_filp = NULL; 4410 i915_gem_object_ggtt_unpin(obj); 4411 } 4412 4413 out: 4414 drm_gem_object_unreference(&obj->base); 4415 unlock: 4416 mutex_unlock(&dev->struct_mutex); 4417 return ret; 4418 } 4419 4420 int 4421 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 4422 struct drm_file *file) 4423 { 4424 struct drm_i915_gem_busy *args = data; 4425 struct drm_i915_gem_object *obj; 4426 int ret; 4427 4428 ret = i915_mutex_lock_interruptible(dev); 4429 if (ret) 4430 return ret; 4431 4432 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4433 if (&obj->base == NULL) { 4434 ret = -ENOENT; 4435 goto unlock; 4436 } 4437 4438 /* Count all active objects as busy, even if they are currently not used 4439 * by the gpu. Users of this interface expect objects to eventually 4440 * become non-busy without any further actions, therefore emit any 4441 * necessary flushes here. 4442 */ 4443 ret = i915_gem_object_flush_active(obj); 4444 4445 args->busy = obj->active; 4446 if (obj->ring) { 4447 args->busy |= intel_ring_flag(obj->ring) << 16; 4448 } 4449 4450 drm_gem_object_unreference(&obj->base); 4451 unlock: 4452 mutex_unlock(&dev->struct_mutex); 4453 return ret; 4454 } 4455 4456 int 4457 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 4458 struct drm_file *file_priv) 4459 { 4460 return i915_gem_ring_throttle(dev, file_priv); 4461 } 4462 4463 int 4464 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 4465 struct drm_file *file_priv) 4466 { 4467 struct drm_i915_gem_madvise *args = data; 4468 struct drm_i915_gem_object *obj; 4469 int ret; 4470 4471 switch (args->madv) { 4472 case I915_MADV_DONTNEED: 4473 case I915_MADV_WILLNEED: 4474 break; 4475 default: 4476 return -EINVAL; 4477 } 4478 4479 ret = i915_mutex_lock_interruptible(dev); 4480 if (ret) 4481 return ret; 4482 4483 obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle)); 4484 if (&obj->base == NULL) { 4485 ret = -ENOENT; 4486 goto unlock; 4487 } 4488 4489 if (i915_gem_obj_is_pinned(obj)) { 4490 ret = -EINVAL; 4491 goto out; 4492 } 4493 4494 if (obj->madv != __I915_MADV_PURGED) 4495 obj->madv = args->madv; 4496 4497 /* if the object is no longer attached, discard its backing storage */ 4498 if (i915_gem_object_is_purgeable(obj) && obj->pages == NULL) 4499 i915_gem_object_truncate(obj); 4500 4501 args->retained = obj->madv != __I915_MADV_PURGED; 4502 4503 out: 4504 drm_gem_object_unreference(&obj->base); 4505 unlock: 4506 mutex_unlock(&dev->struct_mutex); 4507 return ret; 4508 } 4509 4510 void i915_gem_object_init(struct drm_i915_gem_object *obj, 4511 const struct drm_i915_gem_object_ops *ops) 4512 { 4513 INIT_LIST_HEAD(&obj->global_list); 4514 INIT_LIST_HEAD(&obj->ring_list); 4515 INIT_LIST_HEAD(&obj->obj_exec_link); 4516 INIT_LIST_HEAD(&obj->vma_list); 4517 4518 obj->ops = ops; 4519 4520 obj->fence_reg = I915_FENCE_REG_NONE; 4521 obj->madv = I915_MADV_WILLNEED; 4522 4523 i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size); 4524 } 4525 4526 static const struct drm_i915_gem_object_ops i915_gem_object_ops = { 4527 .get_pages = i915_gem_object_get_pages_gtt, 4528 .put_pages = i915_gem_object_put_pages_gtt, 4529 }; 4530 4531 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, 4532 size_t size) 4533 { 4534 struct drm_i915_gem_object *obj; 4535 #if 0 4536 struct address_space *mapping; 4537 gfp_t mask; 4538 #endif 4539 4540 obj = i915_gem_object_alloc(dev); 4541 if (obj == NULL) 4542 return NULL; 4543 4544 if (drm_gem_object_init(dev, &obj->base, size) != 0) { 4545 i915_gem_object_free(obj); 4546 return NULL; 4547 } 4548 4549 #if 0 4550 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 4551 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) { 4552 /* 965gm cannot relocate objects above 4GiB. */ 4553 mask &= ~__GFP_HIGHMEM; 4554 mask |= __GFP_DMA32; 4555 } 4556 4557 mapping = file_inode(obj->base.filp)->i_mapping; 4558 mapping_set_gfp_mask(mapping, mask); 4559 #endif 4560 4561 i915_gem_object_init(obj, &i915_gem_object_ops); 4562 4563 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4564 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4565 4566 if (HAS_LLC(dev)) { 4567 /* On some devices, we can have the GPU use the LLC (the CPU 4568 * cache) for about a 10% performance improvement 4569 * compared to uncached. Graphics requests other than 4570 * display scanout are coherent with the CPU in 4571 * accessing this cache. This means in this mode we 4572 * don't need to clflush on the CPU side, and on the 4573 * GPU side we only need to flush internal caches to 4574 * get data visible to the CPU. 4575 * 4576 * However, we maintain the display planes as UC, and so 4577 * need to rebind when first used as such. 4578 */ 4579 obj->cache_level = I915_CACHE_LLC; 4580 } else 4581 obj->cache_level = I915_CACHE_NONE; 4582 4583 trace_i915_gem_object_create(obj); 4584 4585 return obj; 4586 } 4587 4588 static bool discard_backing_storage(struct drm_i915_gem_object *obj) 4589 { 4590 /* If we are the last user of the backing storage (be it shmemfs 4591 * pages or stolen etc), we know that the pages are going to be 4592 * immediately released. In this case, we can then skip copying 4593 * back the contents from the GPU. 4594 */ 4595 4596 if (obj->madv != I915_MADV_WILLNEED) 4597 return false; 4598 4599 if (obj->base.vm_obj == NULL) 4600 return true; 4601 4602 /* At first glance, this looks racy, but then again so would be 4603 * userspace racing mmap against close. However, the first external 4604 * reference to the filp can only be obtained through the 4605 * i915_gem_mmap_ioctl() which safeguards us against the user 4606 * acquiring such a reference whilst we are in the middle of 4607 * freeing the object. 4608 */ 4609 #if 0 4610 return atomic_long_read(&obj->base.filp->f_count) == 1; 4611 #else 4612 return false; 4613 #endif 4614 } 4615 4616 void i915_gem_free_object(struct drm_gem_object *gem_obj) 4617 { 4618 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 4619 struct drm_device *dev = obj->base.dev; 4620 struct drm_i915_private *dev_priv = dev->dev_private; 4621 struct i915_vma *vma, *next; 4622 4623 intel_runtime_pm_get(dev_priv); 4624 4625 trace_i915_gem_object_destroy(obj); 4626 4627 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) { 4628 int ret; 4629 4630 vma->pin_count = 0; 4631 ret = i915_vma_unbind(vma); 4632 if (WARN_ON(ret == -ERESTARTSYS)) { 4633 bool was_interruptible; 4634 4635 was_interruptible = dev_priv->mm.interruptible; 4636 dev_priv->mm.interruptible = false; 4637 4638 WARN_ON(i915_vma_unbind(vma)); 4639 4640 dev_priv->mm.interruptible = was_interruptible; 4641 } 4642 } 4643 4644 i915_gem_object_detach_phys(obj); 4645 4646 /* Stolen objects don't hold a ref, but do hold pin count. Fix that up 4647 * before progressing. */ 4648 if (obj->stolen) 4649 i915_gem_object_unpin_pages(obj); 4650 4651 WARN_ON(obj->frontbuffer_bits); 4652 4653 if (WARN_ON(obj->pages_pin_count)) 4654 obj->pages_pin_count = 0; 4655 if (discard_backing_storage(obj)) 4656 obj->madv = I915_MADV_DONTNEED; 4657 i915_gem_object_put_pages(obj); 4658 i915_gem_object_free_mmap_offset(obj); 4659 4660 BUG_ON(obj->pages); 4661 4662 #if 0 4663 if (obj->base.import_attach) 4664 drm_prime_gem_destroy(&obj->base, NULL); 4665 #endif 4666 4667 if (obj->ops->release) 4668 obj->ops->release(obj); 4669 4670 drm_gem_object_release(&obj->base); 4671 i915_gem_info_remove_obj(dev_priv, obj->base.size); 4672 4673 kfree(obj->bit_17); 4674 i915_gem_object_free(obj); 4675 4676 intel_runtime_pm_put(dev_priv); 4677 } 4678 4679 struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, 4680 struct i915_address_space *vm) 4681 { 4682 struct i915_vma *vma; 4683 list_for_each_entry(vma, &obj->vma_list, vma_link) 4684 if (vma->vm == vm) 4685 return vma; 4686 4687 return NULL; 4688 } 4689 4690 void i915_gem_vma_destroy(struct i915_vma *vma) 4691 { 4692 struct i915_address_space *vm = NULL; 4693 WARN_ON(vma->node.allocated); 4694 4695 /* Keep the vma as a placeholder in the execbuffer reservation lists */ 4696 if (!list_empty(&vma->exec_list)) 4697 return; 4698 4699 vm = vma->vm; 4700 4701 if (!i915_is_ggtt(vm)) 4702 i915_ppgtt_put(i915_vm_to_ppgtt(vm)); 4703 4704 list_del(&vma->vma_link); 4705 4706 kfree(vma); 4707 } 4708 4709 static void 4710 i915_gem_stop_ringbuffers(struct drm_device *dev) 4711 { 4712 struct drm_i915_private *dev_priv = dev->dev_private; 4713 struct intel_engine_cs *ring; 4714 int i; 4715 4716 for_each_ring(ring, dev_priv, i) 4717 dev_priv->gt.stop_ring(ring); 4718 } 4719 4720 int 4721 i915_gem_suspend(struct drm_device *dev) 4722 { 4723 struct drm_i915_private *dev_priv = dev->dev_private; 4724 int ret = 0; 4725 4726 mutex_lock(&dev->struct_mutex); 4727 if (dev_priv->ums.mm_suspended) 4728 goto err; 4729 4730 ret = i915_gpu_idle(dev); 4731 if (ret) 4732 goto err; 4733 4734 i915_gem_retire_requests(dev); 4735 4736 /* Under UMS, be paranoid and evict. */ 4737 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 4738 i915_gem_evict_everything(dev); 4739 4740 i915_kernel_lost_context(dev); 4741 i915_gem_stop_ringbuffers(dev); 4742 4743 /* Hack! Don't let anybody do execbuf while we don't control the chip. 4744 * We need to replace this with a semaphore, or something. 4745 * And not confound ums.mm_suspended! 4746 */ 4747 dev_priv->ums.mm_suspended = !drm_core_check_feature(dev, 4748 DRIVER_MODESET); 4749 mutex_unlock(&dev->struct_mutex); 4750 4751 del_timer_sync(&dev_priv->gpu_error.hangcheck_timer); 4752 cancel_delayed_work_sync(&dev_priv->mm.retire_work); 4753 #if 0 4754 flush_delayed_work(&dev_priv->mm.idle_work); 4755 #endif 4756 4757 return 0; 4758 4759 err: 4760 mutex_unlock(&dev->struct_mutex); 4761 return ret; 4762 } 4763 4764 int i915_gem_l3_remap(struct intel_engine_cs *ring, int slice) 4765 { 4766 struct drm_device *dev = ring->dev; 4767 struct drm_i915_private *dev_priv = dev->dev_private; 4768 u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200); 4769 u32 *remap_info = dev_priv->l3_parity.remap_info[slice]; 4770 int i, ret; 4771 4772 if (!HAS_L3_DPF(dev) || !remap_info) 4773 return 0; 4774 4775 ret = intel_ring_begin(ring, GEN7_L3LOG_SIZE / 4 * 3); 4776 if (ret) 4777 return ret; 4778 4779 /* 4780 * Note: We do not worry about the concurrent register cacheline hang 4781 * here because no other code should access these registers other than 4782 * at initialization time. 4783 */ 4784 for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) { 4785 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 4786 intel_ring_emit(ring, reg_base + i); 4787 intel_ring_emit(ring, remap_info[i/4]); 4788 } 4789 4790 intel_ring_advance(ring); 4791 4792 return ret; 4793 } 4794 4795 void i915_gem_init_swizzling(struct drm_device *dev) 4796 { 4797 struct drm_i915_private *dev_priv = dev->dev_private; 4798 4799 if (INTEL_INFO(dev)->gen < 5 || 4800 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 4801 return; 4802 4803 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 4804 DISP_TILE_SURFACE_SWIZZLING); 4805 4806 if (IS_GEN5(dev)) 4807 return; 4808 4809 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 4810 if (IS_GEN6(dev)) 4811 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 4812 else if (IS_GEN7(dev)) 4813 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 4814 else if (IS_GEN8(dev)) 4815 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW)); 4816 else 4817 BUG(); 4818 } 4819 4820 static bool 4821 intel_enable_blt(struct drm_device *dev) 4822 { 4823 int revision; 4824 4825 if (!HAS_BLT(dev)) 4826 return false; 4827 4828 /* The blitter was dysfunctional on early prototypes */ 4829 revision = pci_read_config(dev->dev, PCIR_REVID, 1); 4830 if (IS_GEN6(dev) && revision < 8) { 4831 DRM_INFO("BLT not supported on this pre-production hardware;" 4832 " graphics performance will be degraded.\n"); 4833 return false; 4834 } 4835 4836 return true; 4837 } 4838 4839 static void init_unused_ring(struct drm_device *dev, u32 base) 4840 { 4841 struct drm_i915_private *dev_priv = dev->dev_private; 4842 4843 I915_WRITE(RING_CTL(base), 0); 4844 I915_WRITE(RING_HEAD(base), 0); 4845 I915_WRITE(RING_TAIL(base), 0); 4846 I915_WRITE(RING_START(base), 0); 4847 } 4848 4849 static void init_unused_rings(struct drm_device *dev) 4850 { 4851 if (IS_I830(dev)) { 4852 init_unused_ring(dev, PRB1_BASE); 4853 init_unused_ring(dev, SRB0_BASE); 4854 init_unused_ring(dev, SRB1_BASE); 4855 init_unused_ring(dev, SRB2_BASE); 4856 init_unused_ring(dev, SRB3_BASE); 4857 } else if (IS_GEN2(dev)) { 4858 init_unused_ring(dev, SRB0_BASE); 4859 init_unused_ring(dev, SRB1_BASE); 4860 } else if (IS_GEN3(dev)) { 4861 init_unused_ring(dev, PRB1_BASE); 4862 init_unused_ring(dev, PRB2_BASE); 4863 } 4864 } 4865 4866 int i915_gem_init_rings(struct drm_device *dev) 4867 { 4868 struct drm_i915_private *dev_priv = dev->dev_private; 4869 int ret; 4870 4871 /* 4872 * At least 830 can leave some of the unused rings 4873 * "active" (ie. head != tail) after resume which 4874 * will prevent c3 entry. Makes sure all unused rings 4875 * are totally idle. 4876 */ 4877 init_unused_rings(dev); 4878 4879 ret = intel_init_render_ring_buffer(dev); 4880 if (ret) 4881 return ret; 4882 4883 if (HAS_BSD(dev)) { 4884 ret = intel_init_bsd_ring_buffer(dev); 4885 if (ret) 4886 goto cleanup_render_ring; 4887 } 4888 4889 if (intel_enable_blt(dev)) { 4890 ret = intel_init_blt_ring_buffer(dev); 4891 if (ret) 4892 goto cleanup_bsd_ring; 4893 } 4894 4895 if (HAS_VEBOX(dev)) { 4896 ret = intel_init_vebox_ring_buffer(dev); 4897 if (ret) 4898 goto cleanup_blt_ring; 4899 } 4900 4901 if (HAS_BSD2(dev)) { 4902 ret = intel_init_bsd2_ring_buffer(dev); 4903 if (ret) 4904 goto cleanup_vebox_ring; 4905 } 4906 4907 ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000)); 4908 if (ret) 4909 goto cleanup_bsd2_ring; 4910 4911 return 0; 4912 4913 cleanup_bsd2_ring: 4914 intel_cleanup_ring_buffer(&dev_priv->ring[VCS2]); 4915 cleanup_vebox_ring: 4916 intel_cleanup_ring_buffer(&dev_priv->ring[VECS]); 4917 cleanup_blt_ring: 4918 intel_cleanup_ring_buffer(&dev_priv->ring[BCS]); 4919 cleanup_bsd_ring: 4920 intel_cleanup_ring_buffer(&dev_priv->ring[VCS]); 4921 cleanup_render_ring: 4922 intel_cleanup_ring_buffer(&dev_priv->ring[RCS]); 4923 4924 return ret; 4925 } 4926 4927 int 4928 i915_gem_init_hw(struct drm_device *dev) 4929 { 4930 struct drm_i915_private *dev_priv = dev->dev_private; 4931 int ret, i; 4932 4933 #if 0 4934 if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) 4935 return -EIO; 4936 #endif 4937 4938 if (dev_priv->ellc_size) 4939 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf)); 4940 4941 if (IS_HASWELL(dev)) 4942 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev) ? 4943 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); 4944 4945 if (HAS_PCH_NOP(dev)) { 4946 if (IS_IVYBRIDGE(dev)) { 4947 u32 temp = I915_READ(GEN7_MSG_CTL); 4948 temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK); 4949 I915_WRITE(GEN7_MSG_CTL, temp); 4950 } else if (INTEL_INFO(dev)->gen >= 7) { 4951 u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT); 4952 temp &= ~RESET_PCH_HANDSHAKE_ENABLE; 4953 I915_WRITE(HSW_NDE_RSTWRN_OPT, temp); 4954 } 4955 } 4956 4957 i915_gem_init_swizzling(dev); 4958 4959 ret = dev_priv->gt.init_rings(dev); 4960 if (ret) 4961 return ret; 4962 4963 for (i = 0; i < NUM_L3_SLICES(dev); i++) 4964 i915_gem_l3_remap(&dev_priv->ring[RCS], i); 4965 4966 /* 4967 * XXX: Contexts should only be initialized once. Doing a switch to the 4968 * default context switch however is something we'd like to do after 4969 * reset or thaw (the latter may not actually be necessary for HW, but 4970 * goes with our code better). Context switching requires rings (for 4971 * the do_switch), but before enabling PPGTT. So don't move this. 4972 */ 4973 ret = i915_gem_context_enable(dev_priv); 4974 if (ret && ret != -EIO) { 4975 DRM_ERROR("Context enable failed %d\n", ret); 4976 i915_gem_cleanup_ringbuffer(dev); 4977 4978 return ret; 4979 } 4980 4981 ret = i915_ppgtt_init_hw(dev); 4982 if (ret && ret != -EIO) { 4983 DRM_ERROR("PPGTT enable failed %d\n", ret); 4984 i915_gem_cleanup_ringbuffer(dev); 4985 } 4986 4987 return ret; 4988 } 4989 4990 int i915_gem_init(struct drm_device *dev) 4991 { 4992 struct drm_i915_private *dev_priv = dev->dev_private; 4993 int ret; 4994 4995 i915.enable_execlists = intel_sanitize_enable_execlists(dev, 4996 i915.enable_execlists); 4997 4998 mutex_lock(&dev->struct_mutex); 4999 5000 if (IS_VALLEYVIEW(dev)) { 5001 /* VLVA0 (potential hack), BIOS isn't actually waking us */ 5002 I915_WRITE(VLV_GTLC_WAKE_CTRL, VLV_GTLC_ALLOWWAKEREQ); 5003 if (wait_for((I915_READ(VLV_GTLC_PW_STATUS) & 5004 VLV_GTLC_ALLOWWAKEACK), 10)) 5005 DRM_DEBUG_DRIVER("allow wake ack timed out\n"); 5006 } 5007 5008 if (!i915.enable_execlists) { 5009 dev_priv->gt.do_execbuf = i915_gem_ringbuffer_submission; 5010 dev_priv->gt.init_rings = i915_gem_init_rings; 5011 dev_priv->gt.cleanup_ring = intel_cleanup_ring_buffer; 5012 dev_priv->gt.stop_ring = intel_stop_ring_buffer; 5013 } else { 5014 dev_priv->gt.do_execbuf = intel_execlists_submission; 5015 dev_priv->gt.init_rings = intel_logical_rings_init; 5016 dev_priv->gt.cleanup_ring = intel_logical_ring_cleanup; 5017 dev_priv->gt.stop_ring = intel_logical_ring_stop; 5018 } 5019 5020 ret = i915_gem_init_userptr(dev); 5021 if (ret) { 5022 mutex_unlock(&dev->struct_mutex); 5023 return ret; 5024 } 5025 5026 i915_gem_init_global_gtt(dev); 5027 5028 ret = i915_gem_context_init(dev); 5029 if (ret) { 5030 mutex_unlock(&dev->struct_mutex); 5031 return ret; 5032 } 5033 5034 ret = i915_gem_init_hw(dev); 5035 if (ret == -EIO) { 5036 /* Allow ring initialisation to fail by marking the GPU as 5037 * wedged. But we only want to do this where the GPU is angry, 5038 * for all other failure, such as an allocation failure, bail. 5039 */ 5040 DRM_ERROR("Failed to initialize GPU, declaring it wedged\n"); 5041 atomic_set_mask(I915_WEDGED, &dev_priv->gpu_error.reset_counter); 5042 ret = 0; 5043 } 5044 mutex_unlock(&dev->struct_mutex); 5045 5046 /* Allow hardware batchbuffers unless told otherwise, but not for KMS. */ 5047 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 5048 dev_priv->dri1.allow_batchbuffer = 1; 5049 return ret; 5050 } 5051 5052 void 5053 i915_gem_cleanup_ringbuffer(struct drm_device *dev) 5054 { 5055 struct drm_i915_private *dev_priv = dev->dev_private; 5056 struct intel_engine_cs *ring; 5057 int i; 5058 5059 for_each_ring(ring, dev_priv, i) 5060 dev_priv->gt.cleanup_ring(ring); 5061 } 5062 5063 int 5064 i915_gem_entervt_ioctl(struct drm_device *dev, void *data, 5065 struct drm_file *file_priv) 5066 { 5067 struct drm_i915_private *dev_priv = dev->dev_private; 5068 int ret; 5069 5070 if (drm_core_check_feature(dev, DRIVER_MODESET)) 5071 return 0; 5072 5073 if (i915_reset_in_progress(&dev_priv->gpu_error)) { 5074 DRM_ERROR("Reenabling wedged hardware, good luck\n"); 5075 atomic_set(&dev_priv->gpu_error.reset_counter, 0); 5076 } 5077 5078 mutex_lock(&dev->struct_mutex); 5079 dev_priv->ums.mm_suspended = 0; 5080 5081 ret = i915_gem_init_hw(dev); 5082 if (ret != 0) { 5083 mutex_unlock(&dev->struct_mutex); 5084 return ret; 5085 } 5086 5087 BUG_ON(!list_empty(&dev_priv->gtt.base.active_list)); 5088 5089 ret = drm_irq_install(dev, dev->irq); 5090 if (ret) 5091 goto cleanup_ringbuffer; 5092 mutex_unlock(&dev->struct_mutex); 5093 5094 return 0; 5095 5096 cleanup_ringbuffer: 5097 i915_gem_cleanup_ringbuffer(dev); 5098 dev_priv->ums.mm_suspended = 1; 5099 mutex_unlock(&dev->struct_mutex); 5100 5101 return ret; 5102 } 5103 5104 int 5105 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, 5106 struct drm_file *file_priv) 5107 { 5108 if (drm_core_check_feature(dev, DRIVER_MODESET)) 5109 return 0; 5110 5111 mutex_lock(&dev->struct_mutex); 5112 drm_irq_uninstall(dev); 5113 mutex_unlock(&dev->struct_mutex); 5114 5115 return i915_gem_suspend(dev); 5116 } 5117 5118 void 5119 i915_gem_lastclose(struct drm_device *dev) 5120 { 5121 int ret; 5122 5123 if (drm_core_check_feature(dev, DRIVER_MODESET)) 5124 return; 5125 5126 ret = i915_gem_suspend(dev); 5127 if (ret) 5128 DRM_ERROR("failed to idle hardware: %d\n", ret); 5129 } 5130 5131 static void 5132 init_ring_lists(struct intel_engine_cs *ring) 5133 { 5134 INIT_LIST_HEAD(&ring->active_list); 5135 INIT_LIST_HEAD(&ring->request_list); 5136 } 5137 5138 void i915_init_vm(struct drm_i915_private *dev_priv, 5139 struct i915_address_space *vm) 5140 { 5141 if (!i915_is_ggtt(vm)) 5142 drm_mm_init(&vm->mm, vm->start, vm->total); 5143 vm->dev = dev_priv->dev; 5144 INIT_LIST_HEAD(&vm->active_list); 5145 INIT_LIST_HEAD(&vm->inactive_list); 5146 INIT_LIST_HEAD(&vm->global_link); 5147 list_add_tail(&vm->global_link, &dev_priv->vm_list); 5148 } 5149 5150 void 5151 i915_gem_load(struct drm_device *dev) 5152 { 5153 struct drm_i915_private *dev_priv = dev->dev_private; 5154 int i; 5155 5156 INIT_LIST_HEAD(&dev_priv->vm_list); 5157 i915_init_vm(dev_priv, &dev_priv->gtt.base); 5158 5159 INIT_LIST_HEAD(&dev_priv->context_list); 5160 INIT_LIST_HEAD(&dev_priv->mm.unbound_list); 5161 INIT_LIST_HEAD(&dev_priv->mm.bound_list); 5162 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 5163 for (i = 0; i < I915_NUM_RINGS; i++) 5164 init_ring_lists(&dev_priv->ring[i]); 5165 for (i = 0; i < I915_MAX_NUM_FENCES; i++) 5166 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); 5167 INIT_DELAYED_WORK(&dev_priv->mm.retire_work, 5168 i915_gem_retire_work_handler); 5169 INIT_DELAYED_WORK(&dev_priv->mm.idle_work, 5170 i915_gem_idle_work_handler); 5171 init_waitqueue_head(&dev_priv->gpu_error.reset_queue); 5172 5173 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */ 5174 if (!drm_core_check_feature(dev, DRIVER_MODESET) && IS_GEN3(dev)) { 5175 I915_WRITE(MI_ARB_STATE, 5176 _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE)); 5177 } 5178 5179 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; 5180 5181 /* Old X drivers will take 0-2 for front, back, depth buffers */ 5182 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 5183 dev_priv->fence_reg_start = 3; 5184 5185 if (INTEL_INFO(dev)->gen >= 7 && !IS_VALLEYVIEW(dev)) 5186 dev_priv->num_fence_regs = 32; 5187 else if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) 5188 dev_priv->num_fence_regs = 16; 5189 else 5190 dev_priv->num_fence_regs = 8; 5191 5192 /* Initialize fence registers to zero */ 5193 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 5194 i915_gem_restore_fences(dev); 5195 5196 i915_gem_detect_bit_6_swizzle(dev); 5197 init_waitqueue_head(&dev_priv->pending_flip_queue); 5198 5199 dev_priv->mm.interruptible = true; 5200 5201 #if 0 5202 dev_priv->mm.shrinker.scan_objects = i915_gem_shrinker_scan; 5203 dev_priv->mm.shrinker.count_objects = i915_gem_shrinker_count; 5204 dev_priv->mm.shrinker.seeks = DEFAULT_SEEKS; 5205 register_shrinker(&dev_priv->mm.shrinker); 5206 5207 dev_priv->mm.oom_notifier.notifier_call = i915_gem_shrinker_oom; 5208 register_oom_notifier(&dev_priv->mm.oom_notifier); 5209 #endif 5210 5211 lockinit(&dev_priv->fb_tracking.lock, "drmftl", 0, LK_CANRECURSE); 5212 } 5213 5214 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 5215 { 5216 struct drm_i915_file_private *file_priv = file->driver_priv; 5217 5218 cancel_delayed_work_sync(&file_priv->mm.idle_work); 5219 5220 /* Clean up our request list when the client is going away, so that 5221 * later retire_requests won't dereference our soon-to-be-gone 5222 * file_priv. 5223 */ 5224 spin_lock(&file_priv->mm.lock); 5225 while (!list_empty(&file_priv->mm.request_list)) { 5226 struct drm_i915_gem_request *request; 5227 5228 request = list_first_entry(&file_priv->mm.request_list, 5229 struct drm_i915_gem_request, 5230 client_list); 5231 list_del(&request->client_list); 5232 request->file_priv = NULL; 5233 } 5234 spin_unlock(&file_priv->mm.lock); 5235 } 5236 5237 int 5238 i915_gem_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, 5239 vm_ooffset_t foff, struct ucred *cred, u_short *color) 5240 { 5241 *color = 0; /* XXXKIB */ 5242 return (0); 5243 } 5244 5245 void 5246 i915_gem_pager_dtor(void *handle) 5247 { 5248 struct drm_gem_object *obj; 5249 struct drm_device *dev; 5250 5251 obj = handle; 5252 dev = obj->dev; 5253 5254 mutex_lock(&dev->struct_mutex); 5255 drm_gem_free_mmap_offset(obj); 5256 i915_gem_release_mmap(to_intel_bo(obj)); 5257 drm_gem_object_unreference(obj); 5258 mutex_unlock(&dev->struct_mutex); 5259 } 5260 5261 static void 5262 i915_gem_file_idle_work_handler(struct work_struct *work) 5263 { 5264 struct drm_i915_file_private *file_priv = 5265 container_of(work, typeof(*file_priv), mm.idle_work.work); 5266 5267 atomic_set(&file_priv->rps_wait_boost, false); 5268 } 5269 5270 int i915_gem_open(struct drm_device *dev, struct drm_file *file) 5271 { 5272 struct drm_i915_file_private *file_priv; 5273 int ret; 5274 5275 DRM_DEBUG_DRIVER("\n"); 5276 5277 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 5278 if (!file_priv) 5279 return -ENOMEM; 5280 5281 file->driver_priv = file_priv; 5282 file_priv->dev_priv = dev->dev_private; 5283 file_priv->file = file; 5284 5285 spin_init(&file_priv->mm.lock, "i915_priv"); 5286 INIT_LIST_HEAD(&file_priv->mm.request_list); 5287 INIT_DELAYED_WORK(&file_priv->mm.idle_work, 5288 i915_gem_file_idle_work_handler); 5289 5290 ret = i915_gem_context_open(dev, file); 5291 if (ret) 5292 kfree(file_priv); 5293 5294 return ret; 5295 } 5296 5297 void i915_gem_track_fb(struct drm_i915_gem_object *old, 5298 struct drm_i915_gem_object *new, 5299 unsigned frontbuffer_bits) 5300 { 5301 if (old) { 5302 WARN_ON(!mutex_is_locked(&old->base.dev->struct_mutex)); 5303 WARN_ON(!(old->frontbuffer_bits & frontbuffer_bits)); 5304 old->frontbuffer_bits &= ~frontbuffer_bits; 5305 } 5306 5307 if (new) { 5308 WARN_ON(!mutex_is_locked(&new->base.dev->struct_mutex)); 5309 WARN_ON(new->frontbuffer_bits & frontbuffer_bits); 5310 new->frontbuffer_bits |= frontbuffer_bits; 5311 } 5312 } 5313 5314 #if 0 5315 static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task) 5316 { 5317 if (!mutex_is_locked(mutex)) 5318 return false; 5319 5320 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES) 5321 return mutex->owner == task; 5322 #else 5323 /* Since UP may be pre-empted, we cannot assume that we own the lock */ 5324 return false; 5325 #endif 5326 } 5327 #endif 5328 5329 #if 0 5330 static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock) 5331 { 5332 if (!mutex_trylock(&dev->struct_mutex)) { 5333 if (!mutex_is_locked_by(&dev->struct_mutex, current)) 5334 return false; 5335 5336 if (to_i915(dev)->mm.shrinker_no_lock_stealing) 5337 return false; 5338 5339 *unlock = false; 5340 } else 5341 *unlock = true; 5342 5343 return true; 5344 } 5345 5346 static int num_vma_bound(struct drm_i915_gem_object *obj) 5347 { 5348 struct i915_vma *vma; 5349 int count = 0; 5350 5351 list_for_each_entry(vma, &obj->vma_list, vma_link) 5352 if (drm_mm_node_allocated(&vma->node)) 5353 count++; 5354 5355 return count; 5356 } 5357 5358 static unsigned long 5359 i915_gem_inactive_count(struct shrinker *shrinker, struct shrink_control *sc) 5360 { 5361 struct drm_i915_private *dev_priv = 5362 container_of(shrinker, 5363 struct drm_i915_private, 5364 mm.inactive_shrinker); 5365 struct drm_device *dev = dev_priv->dev; 5366 struct drm_i915_gem_object *obj; 5367 unsigned long count; 5368 bool unlock; 5369 5370 if (!i915_gem_shrinker_lock(dev, &unlock)) 5371 return 0; 5372 5373 count = 0; 5374 list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) 5375 if (obj->pages_pin_count == 0) 5376 count += obj->base.size >> PAGE_SHIFT; 5377 5378 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 5379 if (!i915_gem_obj_is_pinned(obj) && 5380 obj->pages_pin_count == num_vma_bound(obj)) 5381 count += obj->base.size >> PAGE_SHIFT; 5382 } 5383 5384 if (unlock) 5385 mutex_unlock(&dev->struct_mutex); 5386 5387 return count; 5388 } 5389 #endif 5390 5391 /* All the new VM stuff */ 5392 unsigned long i915_gem_obj_offset(struct drm_i915_gem_object *o, 5393 struct i915_address_space *vm) 5394 { 5395 struct drm_i915_private *dev_priv = o->base.dev->dev_private; 5396 struct i915_vma *vma; 5397 5398 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); 5399 5400 list_for_each_entry(vma, &o->vma_list, vma_link) { 5401 if (vma->vm == vm) 5402 return vma->node.start; 5403 5404 } 5405 WARN(1, "%s vma for this object not found.\n", 5406 i915_is_ggtt(vm) ? "global" : "ppgtt"); 5407 return -1; 5408 } 5409 5410 bool i915_gem_obj_bound(struct drm_i915_gem_object *o, 5411 struct i915_address_space *vm) 5412 { 5413 struct i915_vma *vma; 5414 5415 list_for_each_entry(vma, &o->vma_list, vma_link) 5416 if (vma->vm == vm && drm_mm_node_allocated(&vma->node)) 5417 return true; 5418 5419 return false; 5420 } 5421 5422 bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o) 5423 { 5424 struct i915_vma *vma; 5425 5426 list_for_each_entry(vma, &o->vma_list, vma_link) 5427 if (drm_mm_node_allocated(&vma->node)) 5428 return true; 5429 5430 return false; 5431 } 5432 5433 unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o, 5434 struct i915_address_space *vm) 5435 { 5436 struct drm_i915_private *dev_priv = o->base.dev->dev_private; 5437 struct i915_vma *vma; 5438 5439 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); 5440 5441 BUG_ON(list_empty(&o->vma_list)); 5442 5443 list_for_each_entry(vma, &o->vma_list, vma_link) 5444 if (vma->vm == vm) 5445 return vma->node.size; 5446 5447 return 0; 5448 } 5449 5450 #if 0 5451 static unsigned long 5452 i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) 5453 { 5454 struct drm_i915_private *dev_priv = 5455 container_of(shrinker, struct drm_i915_private, mm.shrinker); 5456 struct drm_device *dev = dev_priv->dev; 5457 unsigned long freed; 5458 bool unlock; 5459 5460 if (!i915_gem_shrinker_lock(dev, &unlock)) 5461 return SHRINK_STOP; 5462 5463 freed = i915_gem_shrink(dev_priv, 5464 sc->nr_to_scan, 5465 I915_SHRINK_BOUND | 5466 I915_SHRINK_UNBOUND | 5467 I915_SHRINK_PURGEABLE); 5468 if (freed < sc->nr_to_scan) 5469 freed += i915_gem_shrink(dev_priv, 5470 sc->nr_to_scan - freed, 5471 I915_SHRINK_BOUND | 5472 I915_SHRINK_UNBOUND); 5473 if (unlock) 5474 mutex_unlock(&dev->struct_mutex); 5475 5476 return freed; 5477 } 5478 #endif 5479 5480 struct i915_vma *i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj) 5481 { 5482 struct i915_vma *vma; 5483 5484 vma = list_first_entry(&obj->vma_list, typeof(*vma), vma_link); 5485 if (vma->vm != i915_obj_to_ggtt(obj)) 5486 return NULL; 5487 5488 return vma; 5489 } 5490