1 /* 2 * Copyright © 2008 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 * Copyright (c) 2011 The FreeBSD Foundation 27 * All rights reserved. 28 * 29 * This software was developed by Konstantin Belousov under sponsorship from 30 * the FreeBSD Foundation. 31 * 32 * Redistribution and use in source and binary forms, with or without 33 * modification, are permitted provided that the following conditions 34 * are met: 35 * 1. Redistributions of source code must retain the above copyright 36 * notice, this list of conditions and the following disclaimer. 37 * 2. Redistributions in binary form must reproduce the above copyright 38 * notice, this list of conditions and the following disclaimer in the 39 * documentation and/or other materials provided with the distribution. 40 * 41 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 51 * SUCH DAMAGE. 52 * 53 */ 54 55 #include <machine/md_var.h> 56 57 #include <drm/drmP.h> 58 #include <drm/drm_vma_manager.h> 59 #include <drm/i915_drm.h> 60 #include "i915_drv.h" 61 #include "i915_trace.h" 62 #include "intel_drv.h" 63 #include <linux/shmem_fs.h> 64 #include <linux/slab.h> 65 #include <linux/swap.h> 66 #include <linux/pci.h> 67 68 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); 69 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj, 70 bool force); 71 static __must_check int 72 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, 73 bool readonly); 74 static void 75 i915_gem_object_retire(struct drm_i915_gem_object *obj); 76 77 static void i915_gem_write_fence(struct drm_device *dev, int reg, 78 struct drm_i915_gem_object *obj); 79 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, 80 struct drm_i915_fence_reg *fence, 81 bool enable); 82 83 static unsigned long i915_gem_purge(struct drm_i915_private *dev_priv, long target); 84 static unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv); 85 86 static bool cpu_cache_is_coherent(struct drm_device *dev, 87 enum i915_cache_level level) 88 { 89 return HAS_LLC(dev) || level != I915_CACHE_NONE; 90 } 91 92 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 93 { 94 if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) 95 return true; 96 97 return obj->pin_display; 98 } 99 100 static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) 101 { 102 if (obj->tiling_mode) 103 i915_gem_release_mmap(obj); 104 105 /* As we do not have an associated fence register, we will force 106 * a tiling change if we ever need to acquire one. 107 */ 108 obj->fence_dirty = false; 109 obj->fence_reg = I915_FENCE_REG_NONE; 110 } 111 112 /* some bookkeeping */ 113 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 114 size_t size) 115 { 116 spin_lock(&dev_priv->mm.object_stat_lock); 117 dev_priv->mm.object_count++; 118 dev_priv->mm.object_memory += size; 119 spin_unlock(&dev_priv->mm.object_stat_lock); 120 } 121 122 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 123 size_t size) 124 { 125 spin_lock(&dev_priv->mm.object_stat_lock); 126 dev_priv->mm.object_count--; 127 dev_priv->mm.object_memory -= size; 128 spin_unlock(&dev_priv->mm.object_stat_lock); 129 } 130 131 static int 132 i915_gem_wait_for_error(struct i915_gpu_error *error) 133 { 134 int ret; 135 136 #define EXIT_COND (!i915_reset_in_progress(error) || \ 137 i915_terminally_wedged(error)) 138 if (EXIT_COND) 139 return 0; 140 141 /* 142 * Only wait 10 seconds for the gpu reset to complete to avoid hanging 143 * userspace. If it takes that long something really bad is going on and 144 * we should simply try to bail out and fail as gracefully as possible. 145 */ 146 ret = wait_event_interruptible_timeout(error->reset_queue, 147 EXIT_COND, 148 10*HZ); 149 if (ret == 0) { 150 DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); 151 return -EIO; 152 } else if (ret < 0) { 153 return ret; 154 } 155 #undef EXIT_COND 156 157 return 0; 158 } 159 160 int i915_mutex_lock_interruptible(struct drm_device *dev) 161 { 162 struct drm_i915_private *dev_priv = dev->dev_private; 163 int ret; 164 165 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 166 if (ret) 167 return ret; 168 169 ret = mutex_lock_interruptible(&dev->struct_mutex); 170 if (ret) 171 return ret; 172 173 WARN_ON(i915_verify_lists(dev)); 174 return 0; 175 } 176 177 static inline bool 178 i915_gem_object_is_inactive(struct drm_i915_gem_object *obj) 179 { 180 return i915_gem_obj_bound_any(obj) && !obj->active; 181 } 182 183 int 184 i915_gem_init_ioctl(struct drm_device *dev, void *data, 185 struct drm_file *file) 186 { 187 struct drm_i915_private *dev_priv = dev->dev_private; 188 struct drm_i915_gem_init *args = data; 189 190 if (drm_core_check_feature(dev, DRIVER_MODESET)) 191 return -ENODEV; 192 193 if (args->gtt_start >= args->gtt_end || 194 (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1)) 195 return -EINVAL; 196 197 /* GEM with user mode setting was never supported on ilk and later. */ 198 if (INTEL_INFO(dev)->gen >= 5) 199 return -ENODEV; 200 201 mutex_lock(&dev->struct_mutex); 202 kprintf("INITGLOBALGTT GTT_START %016jx\n", (uintmax_t)args->gtt_start); 203 i915_gem_setup_global_gtt(dev, args->gtt_start, args->gtt_end, 204 args->gtt_end); 205 dev_priv->gtt.mappable_end = args->gtt_end; 206 mutex_unlock(&dev->struct_mutex); 207 208 return 0; 209 } 210 211 int 212 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 213 struct drm_file *file) 214 { 215 struct drm_i915_private *dev_priv = dev->dev_private; 216 struct drm_i915_gem_get_aperture *args = data; 217 struct drm_i915_gem_object *obj; 218 size_t pinned; 219 220 pinned = 0; 221 mutex_lock(&dev->struct_mutex); 222 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) 223 if (i915_gem_obj_is_pinned(obj)) 224 pinned += i915_gem_obj_ggtt_size(obj); 225 mutex_unlock(&dev->struct_mutex); 226 227 args->aper_size = dev_priv->gtt.base.total; 228 args->aper_available_size = args->aper_size - pinned; 229 230 return 0; 231 } 232 233 static void i915_gem_object_detach_phys(struct drm_i915_gem_object *obj) 234 { 235 drm_dma_handle_t *phys = obj->phys_handle; 236 237 if (!phys) 238 return; 239 240 if (obj->madv == I915_MADV_WILLNEED) { 241 struct vm_object *mapping = obj->base.vm_obj; 242 char *vaddr = phys->vaddr; 243 int i; 244 245 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 246 struct vm_page *page = shmem_read_mapping_page(mapping, i); 247 if (!IS_ERR(page)) { 248 char *dst = kmap_atomic(page); 249 memcpy(dst, vaddr, PAGE_SIZE); 250 drm_clflush_virt_range(dst, PAGE_SIZE); 251 kunmap_atomic(dst); 252 253 set_page_dirty(page); 254 mark_page_accessed(page); 255 #if 0 256 page_cache_release(page); 257 #endif 258 } 259 vaddr += PAGE_SIZE; 260 } 261 i915_gem_chipset_flush(obj->base.dev); 262 } 263 264 #ifdef CONFIG_X86 265 set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE); 266 #endif 267 drm_pci_free(obj->base.dev, phys); 268 obj->phys_handle = NULL; 269 } 270 271 int 272 i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, 273 int align) 274 { 275 drm_dma_handle_t *phys; 276 struct vm_object *mapping; 277 char *vaddr; 278 int i; 279 280 if (obj->phys_handle) { 281 if ((unsigned long)obj->phys_handle->vaddr & (align -1)) 282 return -EBUSY; 283 284 return 0; 285 } 286 287 if (obj->madv != I915_MADV_WILLNEED) 288 return -EFAULT; 289 290 #if 0 291 if (obj->base.filp == NULL) 292 return -EINVAL; 293 #endif 294 295 /* create a new object */ 296 phys = drm_pci_alloc(obj->base.dev, obj->base.size, align); 297 if (!phys) 298 return -ENOMEM; 299 300 vaddr = phys->vaddr; 301 #ifdef CONFIG_X86 302 set_memory_wc((unsigned long)vaddr, phys->size / PAGE_SIZE); 303 #endif 304 mapping = obj->base.vm_obj; 305 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 306 struct vm_page *page; 307 char *src; 308 309 page = shmem_read_mapping_page(mapping, i); 310 if (IS_ERR(page)) { 311 #ifdef CONFIG_X86 312 set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE); 313 #endif 314 drm_pci_free(obj->base.dev, phys); 315 return PTR_ERR(page); 316 } 317 318 src = kmap_atomic(page); 319 memcpy(vaddr, src, PAGE_SIZE); 320 kunmap_atomic(src); 321 322 mark_page_accessed(page); 323 #if 0 324 page_cache_release(page); 325 #endif 326 327 vaddr += PAGE_SIZE; 328 } 329 330 obj->phys_handle = phys; 331 return 0; 332 } 333 334 static int 335 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, 336 struct drm_i915_gem_pwrite *args, 337 struct drm_file *file_priv) 338 { 339 struct drm_device *dev = obj->base.dev; 340 void *vaddr = (char *)obj->phys_handle->vaddr + args->offset; 341 char __user *user_data = to_user_ptr(args->data_ptr); 342 343 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) { 344 unsigned long unwritten; 345 346 /* The physical object once assigned is fixed for the lifetime 347 * of the obj, so we can safely drop the lock and continue 348 * to access vaddr. 349 */ 350 mutex_unlock(&dev->struct_mutex); 351 unwritten = copy_from_user(vaddr, user_data, args->size); 352 mutex_lock(&dev->struct_mutex); 353 if (unwritten) 354 return -EFAULT; 355 } 356 357 i915_gem_chipset_flush(dev); 358 return 0; 359 } 360 361 void *i915_gem_object_alloc(struct drm_device *dev) 362 { 363 return kmalloc(sizeof(struct drm_i915_gem_object), 364 M_DRM, M_WAITOK | M_ZERO); 365 } 366 367 void i915_gem_object_free(struct drm_i915_gem_object *obj) 368 { 369 kfree(obj); 370 } 371 372 static int 373 i915_gem_create(struct drm_file *file, 374 struct drm_device *dev, 375 uint64_t size, 376 uint32_t *handle_p) 377 { 378 struct drm_i915_gem_object *obj; 379 int ret; 380 u32 handle; 381 382 size = roundup(size, PAGE_SIZE); 383 if (size == 0) 384 return -EINVAL; 385 386 /* Allocate the new object */ 387 obj = i915_gem_alloc_object(dev, size); 388 if (obj == NULL) 389 return -ENOMEM; 390 391 ret = drm_gem_handle_create(file, &obj->base, &handle); 392 /* drop reference from allocate - handle holds it now */ 393 drm_gem_object_unreference_unlocked(&obj->base); 394 if (ret) 395 return ret; 396 397 *handle_p = handle; 398 return 0; 399 } 400 401 int 402 i915_gem_dumb_create(struct drm_file *file, 403 struct drm_device *dev, 404 struct drm_mode_create_dumb *args) 405 { 406 /* have to work out size/pitch and return them */ 407 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); 408 args->size = args->pitch * args->height; 409 return i915_gem_create(file, dev, 410 args->size, &args->handle); 411 } 412 413 /** 414 * Creates a new mm object and returns a handle to it. 415 */ 416 int 417 i915_gem_create_ioctl(struct drm_device *dev, void *data, 418 struct drm_file *file) 419 { 420 struct drm_i915_gem_create *args = data; 421 422 return i915_gem_create(file, dev, 423 args->size, &args->handle); 424 } 425 426 static inline int 427 __copy_to_user_swizzled(char __user *cpu_vaddr, 428 const char *gpu_vaddr, int gpu_offset, 429 int length) 430 { 431 int ret, cpu_offset = 0; 432 433 while (length > 0) { 434 int cacheline_end = ALIGN(gpu_offset + 1, 64); 435 int this_length = min(cacheline_end - gpu_offset, length); 436 int swizzled_gpu_offset = gpu_offset ^ 64; 437 438 ret = __copy_to_user(cpu_vaddr + cpu_offset, 439 gpu_vaddr + swizzled_gpu_offset, 440 this_length); 441 if (ret) 442 return ret + length; 443 444 cpu_offset += this_length; 445 gpu_offset += this_length; 446 length -= this_length; 447 } 448 449 return 0; 450 } 451 452 static inline int 453 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, 454 const char __user *cpu_vaddr, 455 int length) 456 { 457 int ret, cpu_offset = 0; 458 459 while (length > 0) { 460 int cacheline_end = ALIGN(gpu_offset + 1, 64); 461 int this_length = min(cacheline_end - gpu_offset, length); 462 int swizzled_gpu_offset = gpu_offset ^ 64; 463 464 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, 465 cpu_vaddr + cpu_offset, 466 this_length); 467 if (ret) 468 return ret + length; 469 470 cpu_offset += this_length; 471 gpu_offset += this_length; 472 length -= this_length; 473 } 474 475 return 0; 476 } 477 478 /* 479 * Pins the specified object's pages and synchronizes the object with 480 * GPU accesses. Sets needs_clflush to non-zero if the caller should 481 * flush the object from the CPU cache. 482 */ 483 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, 484 int *needs_clflush) 485 { 486 int ret; 487 488 *needs_clflush = 0; 489 490 #if 0 491 if (!obj->base.filp) 492 return -EINVAL; 493 #endif 494 495 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) { 496 /* If we're not in the cpu read domain, set ourself into the gtt 497 * read domain and manually flush cachelines (if required). This 498 * optimizes for the case when the gpu will dirty the data 499 * anyway again before the next pread happens. */ 500 *needs_clflush = !cpu_cache_is_coherent(obj->base.dev, 501 obj->cache_level); 502 ret = i915_gem_object_wait_rendering(obj, true); 503 if (ret) 504 return ret; 505 506 i915_gem_object_retire(obj); 507 } 508 509 ret = i915_gem_object_get_pages(obj); 510 if (ret) 511 return ret; 512 513 i915_gem_object_pin_pages(obj); 514 515 return ret; 516 } 517 518 /* Per-page copy function for the shmem pread fastpath. 519 * Flushes invalid cachelines before reading the target if 520 * needs_clflush is set. */ 521 static int 522 shmem_pread_fast(struct vm_page *page, int shmem_page_offset, int page_length, 523 char __user *user_data, 524 bool page_do_bit17_swizzling, bool needs_clflush) 525 { 526 char *vaddr; 527 int ret; 528 529 if (unlikely(page_do_bit17_swizzling)) 530 return -EINVAL; 531 532 vaddr = kmap_atomic(page); 533 if (needs_clflush) 534 drm_clflush_virt_range(vaddr + shmem_page_offset, 535 page_length); 536 ret = __copy_to_user_inatomic(user_data, 537 vaddr + shmem_page_offset, 538 page_length); 539 kunmap_atomic(vaddr); 540 541 return ret ? -EFAULT : 0; 542 } 543 544 static void 545 shmem_clflush_swizzled_range(char *addr, unsigned long length, 546 bool swizzled) 547 { 548 if (unlikely(swizzled)) { 549 unsigned long start = (unsigned long) addr; 550 unsigned long end = (unsigned long) addr + length; 551 552 /* For swizzling simply ensure that we always flush both 553 * channels. Lame, but simple and it works. Swizzled 554 * pwrite/pread is far from a hotpath - current userspace 555 * doesn't use it at all. */ 556 start = round_down(start, 128); 557 end = round_up(end, 128); 558 559 drm_clflush_virt_range((void *)start, end - start); 560 } else { 561 drm_clflush_virt_range(addr, length); 562 } 563 564 } 565 566 /* Only difference to the fast-path function is that this can handle bit17 567 * and uses non-atomic copy and kmap functions. */ 568 static int 569 shmem_pread_slow(struct vm_page *page, int shmem_page_offset, int page_length, 570 char __user *user_data, 571 bool page_do_bit17_swizzling, bool needs_clflush) 572 { 573 char *vaddr; 574 int ret; 575 576 vaddr = kmap(page); 577 if (needs_clflush) 578 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 579 page_length, 580 page_do_bit17_swizzling); 581 582 if (page_do_bit17_swizzling) 583 ret = __copy_to_user_swizzled(user_data, 584 vaddr, shmem_page_offset, 585 page_length); 586 else 587 ret = __copy_to_user(user_data, 588 vaddr + shmem_page_offset, 589 page_length); 590 kunmap(page); 591 592 return ret ? - EFAULT : 0; 593 } 594 595 static int 596 i915_gem_shmem_pread(struct drm_device *dev, 597 struct drm_i915_gem_object *obj, 598 struct drm_i915_gem_pread *args, 599 struct drm_file *file) 600 { 601 char __user *user_data; 602 ssize_t remain; 603 loff_t offset; 604 int shmem_page_offset, page_length, ret = 0; 605 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 606 int prefaulted = 0; 607 int needs_clflush = 0; 608 int i; 609 610 user_data = to_user_ptr(args->data_ptr); 611 remain = args->size; 612 613 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 614 615 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); 616 if (ret) 617 return ret; 618 619 offset = args->offset; 620 621 for (i = 0; i < (obj->base.size >> PAGE_SHIFT); i++) { 622 struct vm_page *page = obj->pages[i]; 623 624 if (remain <= 0) 625 break; 626 627 /* Operation in this page 628 * 629 * shmem_page_offset = offset within page in shmem file 630 * page_length = bytes to copy for this page 631 */ 632 shmem_page_offset = offset_in_page(offset); 633 page_length = remain; 634 if ((shmem_page_offset + page_length) > PAGE_SIZE) 635 page_length = PAGE_SIZE - shmem_page_offset; 636 637 page_do_bit17_swizzling = obj_do_bit17_swizzling && 638 (page_to_phys(page) & (1 << 17)) != 0; 639 640 ret = shmem_pread_fast(page, shmem_page_offset, page_length, 641 user_data, page_do_bit17_swizzling, 642 needs_clflush); 643 if (ret == 0) 644 goto next_page; 645 646 mutex_unlock(&dev->struct_mutex); 647 648 if (likely(!i915.prefault_disable) && !prefaulted) { 649 ret = fault_in_multipages_writeable(user_data, remain); 650 /* Userspace is tricking us, but we've already clobbered 651 * its pages with the prefault and promised to write the 652 * data up to the first fault. Hence ignore any errors 653 * and just continue. */ 654 (void)ret; 655 prefaulted = 1; 656 } 657 658 ret = shmem_pread_slow(page, shmem_page_offset, page_length, 659 user_data, page_do_bit17_swizzling, 660 needs_clflush); 661 662 mutex_lock(&dev->struct_mutex); 663 664 if (ret) 665 goto out; 666 667 next_page: 668 remain -= page_length; 669 user_data += page_length; 670 offset += page_length; 671 } 672 673 out: 674 i915_gem_object_unpin_pages(obj); 675 676 return ret; 677 } 678 679 /** 680 * Reads data from the object referenced by handle. 681 * 682 * On error, the contents of *data are undefined. 683 */ 684 int 685 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 686 struct drm_file *file) 687 { 688 struct drm_i915_gem_pread *args = data; 689 struct drm_i915_gem_object *obj; 690 int ret = 0; 691 692 if (args->size == 0) 693 return 0; 694 695 ret = i915_mutex_lock_interruptible(dev); 696 if (ret) 697 return ret; 698 699 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 700 if (&obj->base == NULL) { 701 ret = -ENOENT; 702 goto unlock; 703 } 704 705 /* Bounds check source. */ 706 if (args->offset > obj->base.size || 707 args->size > obj->base.size - args->offset) { 708 ret = -EINVAL; 709 goto out; 710 } 711 712 trace_i915_gem_object_pread(obj, args->offset, args->size); 713 714 ret = i915_gem_shmem_pread(dev, obj, args, file); 715 716 out: 717 drm_gem_object_unreference(&obj->base); 718 unlock: 719 mutex_unlock(&dev->struct_mutex); 720 return ret; 721 } 722 723 /* This is the fast write path which cannot handle 724 * page faults in the source data 725 */ 726 727 static inline int 728 fast_user_write(struct io_mapping *mapping, 729 loff_t page_base, int page_offset, 730 char __user *user_data, 731 int length) 732 { 733 void __iomem *vaddr_atomic; 734 void *vaddr; 735 unsigned long unwritten; 736 737 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); 738 /* We can use the cpu mem copy function because this is X86. */ 739 vaddr = (char __force*)vaddr_atomic + page_offset; 740 unwritten = __copy_from_user_inatomic_nocache(vaddr, 741 user_data, length); 742 io_mapping_unmap_atomic(vaddr_atomic); 743 return unwritten; 744 } 745 746 /** 747 * This is the fast pwrite path, where we copy the data directly from the 748 * user into the GTT, uncached. 749 */ 750 static int 751 i915_gem_gtt_pwrite_fast(struct drm_device *dev, 752 struct drm_i915_gem_object *obj, 753 struct drm_i915_gem_pwrite *args, 754 struct drm_file *file) 755 { 756 struct drm_i915_private *dev_priv = dev->dev_private; 757 ssize_t remain; 758 loff_t offset, page_base; 759 char __user *user_data; 760 int page_offset, page_length, ret; 761 762 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK); 763 if (ret) 764 goto out; 765 766 ret = i915_gem_object_set_to_gtt_domain(obj, true); 767 if (ret) 768 goto out_unpin; 769 770 ret = i915_gem_object_put_fence(obj); 771 if (ret) 772 goto out_unpin; 773 774 user_data = to_user_ptr(args->data_ptr); 775 remain = args->size; 776 777 offset = i915_gem_obj_ggtt_offset(obj) + args->offset; 778 779 while (remain > 0) { 780 /* Operation in this page 781 * 782 * page_base = page offset within aperture 783 * page_offset = offset within page 784 * page_length = bytes to copy for this page 785 */ 786 page_base = offset & ~PAGE_MASK; 787 page_offset = offset_in_page(offset); 788 page_length = remain; 789 if ((page_offset + remain) > PAGE_SIZE) 790 page_length = PAGE_SIZE - page_offset; 791 792 /* If we get a fault while copying data, then (presumably) our 793 * source page isn't available. Return the error and we'll 794 * retry in the slow path. 795 */ 796 if (fast_user_write(dev_priv->gtt.mappable, page_base, 797 page_offset, user_data, page_length)) { 798 ret = -EFAULT; 799 goto out_unpin; 800 } 801 802 remain -= page_length; 803 user_data += page_length; 804 offset += page_length; 805 } 806 807 out_unpin: 808 i915_gem_object_ggtt_unpin(obj); 809 out: 810 return ret; 811 } 812 813 /* Per-page copy function for the shmem pwrite fastpath. 814 * Flushes invalid cachelines before writing to the target if 815 * needs_clflush_before is set and flushes out any written cachelines after 816 * writing if needs_clflush is set. */ 817 static int 818 shmem_pwrite_fast(struct vm_page *page, int shmem_page_offset, int page_length, 819 char __user *user_data, 820 bool page_do_bit17_swizzling, 821 bool needs_clflush_before, 822 bool needs_clflush_after) 823 { 824 char *vaddr; 825 int ret; 826 827 if (unlikely(page_do_bit17_swizzling)) 828 return -EINVAL; 829 830 vaddr = kmap_atomic(page); 831 if (needs_clflush_before) 832 drm_clflush_virt_range(vaddr + shmem_page_offset, 833 page_length); 834 ret = __copy_from_user_inatomic(vaddr + shmem_page_offset, 835 user_data, page_length); 836 if (needs_clflush_after) 837 drm_clflush_virt_range(vaddr + shmem_page_offset, 838 page_length); 839 kunmap_atomic(vaddr); 840 841 return ret ? -EFAULT : 0; 842 } 843 844 /* Only difference to the fast-path function is that this can handle bit17 845 * and uses non-atomic copy and kmap functions. */ 846 static int 847 shmem_pwrite_slow(struct vm_page *page, int shmem_page_offset, int page_length, 848 char __user *user_data, 849 bool page_do_bit17_swizzling, 850 bool needs_clflush_before, 851 bool needs_clflush_after) 852 { 853 char *vaddr; 854 int ret; 855 856 vaddr = kmap(page); 857 if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) 858 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 859 page_length, 860 page_do_bit17_swizzling); 861 if (page_do_bit17_swizzling) 862 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset, 863 user_data, 864 page_length); 865 else 866 ret = __copy_from_user(vaddr + shmem_page_offset, 867 user_data, 868 page_length); 869 if (needs_clflush_after) 870 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 871 page_length, 872 page_do_bit17_swizzling); 873 kunmap(page); 874 875 return ret ? -EFAULT : 0; 876 } 877 878 static int 879 i915_gem_shmem_pwrite(struct drm_device *dev, 880 struct drm_i915_gem_object *obj, 881 struct drm_i915_gem_pwrite *args, 882 struct drm_file *file) 883 { 884 ssize_t remain; 885 loff_t offset; 886 char __user *user_data; 887 int shmem_page_offset, page_length, ret = 0; 888 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 889 int hit_slowpath = 0; 890 int needs_clflush_after = 0; 891 int needs_clflush_before = 0; 892 int i; 893 894 user_data = to_user_ptr(args->data_ptr); 895 remain = args->size; 896 897 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 898 899 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 900 /* If we're not in the cpu write domain, set ourself into the gtt 901 * write domain and manually flush cachelines (if required). This 902 * optimizes for the case when the gpu will use the data 903 * right away and we therefore have to clflush anyway. */ 904 needs_clflush_after = cpu_write_needs_clflush(obj); 905 ret = i915_gem_object_wait_rendering(obj, false); 906 if (ret) 907 return ret; 908 909 i915_gem_object_retire(obj); 910 } 911 /* Same trick applies to invalidate partially written cachelines read 912 * before writing. */ 913 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) 914 needs_clflush_before = 915 !cpu_cache_is_coherent(dev, obj->cache_level); 916 917 ret = i915_gem_object_get_pages(obj); 918 if (ret) 919 return ret; 920 921 i915_gem_object_pin_pages(obj); 922 923 offset = args->offset; 924 obj->dirty = 1; 925 926 VM_OBJECT_LOCK(obj->base.vm_obj); 927 vm_object_pip_add(obj->base.vm_obj, 1); 928 for (i = 0; i < (obj->base.size >> PAGE_SHIFT); i++) { 929 struct vm_page *page = obj->pages[i]; 930 int partial_cacheline_write; 931 932 if (i < offset >> PAGE_SHIFT) 933 continue; 934 935 if (remain <= 0) 936 break; 937 938 /* Operation in this page 939 * 940 * shmem_page_offset = offset within page in shmem file 941 * page_length = bytes to copy for this page 942 */ 943 shmem_page_offset = offset_in_page(offset); 944 945 page_length = remain; 946 if ((shmem_page_offset + page_length) > PAGE_SIZE) 947 page_length = PAGE_SIZE - shmem_page_offset; 948 949 /* If we don't overwrite a cacheline completely we need to be 950 * careful to have up-to-date data by first clflushing. Don't 951 * overcomplicate things and flush the entire patch. */ 952 partial_cacheline_write = needs_clflush_before && 953 ((shmem_page_offset | page_length) 954 & (cpu_clflush_line_size - 1)); 955 956 page_do_bit17_swizzling = obj_do_bit17_swizzling && 957 (page_to_phys(page) & (1 << 17)) != 0; 958 959 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length, 960 user_data, page_do_bit17_swizzling, 961 partial_cacheline_write, 962 needs_clflush_after); 963 if (ret == 0) 964 goto next_page; 965 966 hit_slowpath = 1; 967 mutex_unlock(&dev->struct_mutex); 968 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length, 969 user_data, page_do_bit17_swizzling, 970 partial_cacheline_write, 971 needs_clflush_after); 972 973 mutex_lock(&dev->struct_mutex); 974 975 if (ret) 976 goto out; 977 978 next_page: 979 remain -= page_length; 980 user_data += page_length; 981 offset += page_length; 982 } 983 vm_object_pip_wakeup(obj->base.vm_obj); 984 VM_OBJECT_UNLOCK(obj->base.vm_obj); 985 986 out: 987 i915_gem_object_unpin_pages(obj); 988 989 if (hit_slowpath) { 990 /* 991 * Fixup: Flush cpu caches in case we didn't flush the dirty 992 * cachelines in-line while writing and the object moved 993 * out of the cpu write domain while we've dropped the lock. 994 */ 995 if (!needs_clflush_after && 996 obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 997 if (i915_gem_clflush_object(obj, obj->pin_display)) 998 i915_gem_chipset_flush(dev); 999 } 1000 } 1001 1002 if (needs_clflush_after) 1003 i915_gem_chipset_flush(dev); 1004 1005 return ret; 1006 } 1007 1008 /** 1009 * Writes data to the object referenced by handle. 1010 * 1011 * On error, the contents of the buffer that were to be modified are undefined. 1012 */ 1013 int 1014 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 1015 struct drm_file *file) 1016 { 1017 struct drm_i915_gem_pwrite *args = data; 1018 struct drm_i915_gem_object *obj; 1019 int ret; 1020 1021 if (args->size == 0) 1022 return 0; 1023 1024 if (likely(!i915.prefault_disable)) { 1025 ret = fault_in_multipages_readable(to_user_ptr(args->data_ptr), 1026 args->size); 1027 if (ret) 1028 return -EFAULT; 1029 } 1030 1031 ret = i915_mutex_lock_interruptible(dev); 1032 if (ret) 1033 return ret; 1034 1035 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1036 if (&obj->base == NULL) { 1037 ret = -ENOENT; 1038 goto unlock; 1039 } 1040 1041 /* Bounds check destination. */ 1042 if (args->offset > obj->base.size || 1043 args->size > obj->base.size - args->offset) { 1044 ret = -EINVAL; 1045 goto out; 1046 } 1047 1048 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 1049 1050 ret = -EFAULT; 1051 /* We can only do the GTT pwrite on untiled buffers, as otherwise 1052 * it would end up going through the fenced access, and we'll get 1053 * different detiling behavior between reading and writing. 1054 * pread/pwrite currently are reading and writing from the CPU 1055 * perspective, requiring manual detiling by the client. 1056 */ 1057 if (obj->phys_handle) { 1058 ret = i915_gem_phys_pwrite(obj, args, file); 1059 goto out; 1060 } 1061 1062 if (obj->tiling_mode == I915_TILING_NONE && 1063 obj->base.write_domain != I915_GEM_DOMAIN_CPU && 1064 cpu_write_needs_clflush(obj)) { 1065 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); 1066 /* Note that the gtt paths might fail with non-page-backed user 1067 * pointers (e.g. gtt mappings when moving data between 1068 * textures). Fallback to the shmem path in that case. */ 1069 } 1070 1071 if (ret == -EFAULT || ret == -ENOSPC) 1072 ret = i915_gem_shmem_pwrite(dev, obj, args, file); 1073 1074 out: 1075 drm_gem_object_unreference(&obj->base); 1076 unlock: 1077 mutex_unlock(&dev->struct_mutex); 1078 return ret; 1079 } 1080 1081 int 1082 i915_gem_check_wedge(struct i915_gpu_error *error, 1083 bool interruptible) 1084 { 1085 if (i915_reset_in_progress(error)) { 1086 /* Non-interruptible callers can't handle -EAGAIN, hence return 1087 * -EIO unconditionally for these. */ 1088 if (!interruptible) 1089 return -EIO; 1090 1091 /* Recovery complete, but the reset failed ... */ 1092 if (i915_terminally_wedged(error)) 1093 return -EIO; 1094 1095 return -EAGAIN; 1096 } 1097 1098 return 0; 1099 } 1100 1101 /* 1102 * Compare seqno against outstanding lazy request. Emit a request if they are 1103 * equal. 1104 */ 1105 int 1106 i915_gem_check_olr(struct intel_engine_cs *ring, u32 seqno) 1107 { 1108 int ret; 1109 1110 BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex)); 1111 1112 ret = 0; 1113 if (seqno == ring->outstanding_lazy_seqno) 1114 ret = i915_add_request(ring, NULL); 1115 1116 return ret; 1117 } 1118 1119 #if 0 1120 static void fake_irq(unsigned long data) 1121 { 1122 wake_up_process((struct task_struct *)data); 1123 } 1124 1125 static bool missed_irq(struct drm_i915_private *dev_priv, 1126 struct intel_engine_cs *ring) 1127 { 1128 return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings); 1129 } 1130 1131 static bool can_wait_boost(struct drm_i915_file_private *file_priv) 1132 { 1133 if (file_priv == NULL) 1134 return true; 1135 1136 return !atomic_xchg(&file_priv->rps_wait_boost, true); 1137 } 1138 #endif 1139 1140 /** 1141 * __wait_seqno - wait until execution of seqno has finished 1142 * @ring: the ring expected to report seqno 1143 * @seqno: duh! 1144 * @reset_counter: reset sequence associated with the given seqno 1145 * @interruptible: do an interruptible wait (normally yes) 1146 * @timeout: in - how long to wait (NULL forever); out - how much time remaining 1147 * 1148 * Note: It is of utmost importance that the passed in seqno and reset_counter 1149 * values have been read by the caller in an smp safe manner. Where read-side 1150 * locks are involved, it is sufficient to read the reset_counter before 1151 * unlocking the lock that protects the seqno. For lockless tricks, the 1152 * reset_counter _must_ be read before, and an appropriate smp_rmb must be 1153 * inserted. 1154 * 1155 * Returns 0 if the seqno was found within the alloted time. Else returns the 1156 * errno with remaining time filled in timeout argument. 1157 */ 1158 static int __wait_seqno(struct intel_engine_cs *ring, u32 seqno, 1159 unsigned reset_counter, 1160 bool interruptible, 1161 struct timespec *timeout, 1162 struct drm_i915_file_private *file_priv) 1163 { 1164 struct drm_i915_private *dev_priv = ring->dev->dev_private; 1165 struct timespec before, now, wait_time={1,0}; 1166 unsigned long timeout_jiffies; 1167 long end; 1168 bool wait_forever = true; 1169 int ret; 1170 1171 WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled"); 1172 1173 if (i915_seqno_passed(ring->get_seqno(ring, true), seqno)) 1174 return 0; 1175 1176 if (timeout != NULL) { 1177 wait_time = *timeout; 1178 wait_forever = false; 1179 } 1180 1181 timeout_jiffies = timespec_to_jiffies_timeout(&wait_time); 1182 1183 if (WARN_ON(!ring->irq_get(ring))) 1184 return -ENODEV; 1185 1186 /* Record current time in case interrupted by signal, or wedged */ 1187 trace_i915_gem_request_wait_begin(ring, seqno); 1188 getrawmonotonic(&before); 1189 1190 #define EXIT_COND \ 1191 (i915_seqno_passed(ring->get_seqno(ring, false), seqno) || \ 1192 i915_reset_in_progress(&dev_priv->gpu_error) || \ 1193 reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) 1194 do { 1195 if (interruptible) 1196 end = wait_event_interruptible_timeout(ring->irq_queue, 1197 EXIT_COND, 1198 timeout_jiffies); 1199 else 1200 end = wait_event_timeout(ring->irq_queue, EXIT_COND, 1201 timeout_jiffies); 1202 1203 /* We need to check whether any gpu reset happened in between 1204 * the caller grabbing the seqno and now ... */ 1205 if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) 1206 end = -EAGAIN; 1207 1208 /* ... but upgrade the -EGAIN to an -EIO if the gpu is truely 1209 * gone. */ 1210 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); 1211 if (ret) 1212 end = ret; 1213 } while (end == 0 && wait_forever); 1214 1215 getrawmonotonic(&now); 1216 1217 ring->irq_put(ring); 1218 trace_i915_gem_request_wait_end(ring, seqno); 1219 #undef EXIT_COND 1220 1221 if (timeout) { 1222 struct timespec sleep_time = timespec_sub(now, before); 1223 *timeout = timespec_sub(*timeout, sleep_time); 1224 if (!timespec_valid(timeout)) /* i.e. negative time remains */ 1225 set_normalized_timespec(timeout, 0, 0); 1226 } 1227 1228 switch (end) { 1229 case -EIO: 1230 case -EAGAIN: /* Wedged */ 1231 case -ERESTARTSYS: /* Signal */ 1232 return (int)end; 1233 case 0: /* Timeout */ 1234 return -ETIMEDOUT; /* -ETIME on Linux */ 1235 default: /* Completed */ 1236 WARN_ON(end < 0); /* We're not aware of other errors */ 1237 return 0; 1238 } 1239 } 1240 1241 /** 1242 * Waits for a sequence number to be signaled, and cleans up the 1243 * request and object lists appropriately for that event. 1244 */ 1245 int 1246 i915_wait_seqno(struct intel_engine_cs *ring, uint32_t seqno) 1247 { 1248 struct drm_device *dev = ring->dev; 1249 struct drm_i915_private *dev_priv = dev->dev_private; 1250 bool interruptible = dev_priv->mm.interruptible; 1251 int ret; 1252 1253 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1254 BUG_ON(seqno == 0); 1255 1256 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); 1257 if (ret) 1258 return ret; 1259 1260 ret = i915_gem_check_olr(ring, seqno); 1261 if (ret) 1262 return ret; 1263 1264 return __wait_seqno(ring, seqno, 1265 atomic_read(&dev_priv->gpu_error.reset_counter), 1266 interruptible, NULL, NULL); 1267 } 1268 1269 static int 1270 i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj, 1271 struct intel_engine_cs *ring) 1272 { 1273 if (!obj->active) 1274 return 0; 1275 1276 /* Manually manage the write flush as we may have not yet 1277 * retired the buffer. 1278 * 1279 * Note that the last_write_seqno is always the earlier of 1280 * the two (read/write) seqno, so if we haved successfully waited, 1281 * we know we have passed the last write. 1282 */ 1283 obj->last_write_seqno = 0; 1284 1285 return 0; 1286 } 1287 1288 /** 1289 * Ensures that all rendering to the object has completed and the object is 1290 * safe to unbind from the GTT or access from the CPU. 1291 */ 1292 static __must_check int 1293 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, 1294 bool readonly) 1295 { 1296 struct intel_engine_cs *ring = obj->ring; 1297 u32 seqno; 1298 int ret; 1299 1300 seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno; 1301 if (seqno == 0) 1302 return 0; 1303 1304 ret = i915_wait_seqno(ring, seqno); 1305 if (ret) 1306 return ret; 1307 1308 return i915_gem_object_wait_rendering__tail(obj, ring); 1309 } 1310 1311 /* A nonblocking variant of the above wait. This is a highly dangerous routine 1312 * as the object state may change during this call. 1313 */ 1314 static __must_check int 1315 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, 1316 struct drm_i915_file_private *file_priv, 1317 bool readonly) 1318 { 1319 struct drm_device *dev = obj->base.dev; 1320 struct drm_i915_private *dev_priv = dev->dev_private; 1321 struct intel_engine_cs *ring = obj->ring; 1322 unsigned reset_counter; 1323 u32 seqno; 1324 int ret; 1325 1326 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1327 BUG_ON(!dev_priv->mm.interruptible); 1328 1329 seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno; 1330 if (seqno == 0) 1331 return 0; 1332 1333 ret = i915_gem_check_wedge(&dev_priv->gpu_error, true); 1334 if (ret) 1335 return ret; 1336 1337 ret = i915_gem_check_olr(ring, seqno); 1338 if (ret) 1339 return ret; 1340 1341 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 1342 mutex_unlock(&dev->struct_mutex); 1343 ret = __wait_seqno(ring, seqno, reset_counter, true, NULL, file_priv); 1344 mutex_lock(&dev->struct_mutex); 1345 if (ret) 1346 return ret; 1347 1348 return i915_gem_object_wait_rendering__tail(obj, ring); 1349 } 1350 1351 /** 1352 * Called when user space prepares to use an object with the CPU, either 1353 * through the mmap ioctl's mapping or a GTT mapping. 1354 */ 1355 int 1356 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1357 struct drm_file *file) 1358 { 1359 struct drm_i915_gem_set_domain *args = data; 1360 struct drm_i915_gem_object *obj; 1361 uint32_t read_domains = args->read_domains; 1362 uint32_t write_domain = args->write_domain; 1363 int ret; 1364 1365 /* Only handle setting domains to types used by the CPU. */ 1366 if (write_domain & I915_GEM_GPU_DOMAINS) 1367 return -EINVAL; 1368 1369 if (read_domains & I915_GEM_GPU_DOMAINS) 1370 return -EINVAL; 1371 1372 /* Having something in the write domain implies it's in the read 1373 * domain, and only that read domain. Enforce that in the request. 1374 */ 1375 if (write_domain != 0 && read_domains != write_domain) 1376 return -EINVAL; 1377 1378 ret = i915_mutex_lock_interruptible(dev); 1379 if (ret) 1380 return ret; 1381 1382 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1383 if (&obj->base == NULL) { 1384 ret = -ENOENT; 1385 goto unlock; 1386 } 1387 1388 /* Try to flush the object off the GPU without holding the lock. 1389 * We will repeat the flush holding the lock in the normal manner 1390 * to catch cases where we are gazumped. 1391 */ 1392 ret = i915_gem_object_wait_rendering__nonblocking(obj, 1393 file->driver_priv, 1394 !write_domain); 1395 if (ret) 1396 goto unref; 1397 1398 if (read_domains & I915_GEM_DOMAIN_GTT) { 1399 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 1400 1401 /* Silently promote "you're not bound, there was nothing to do" 1402 * to success, since the client was just asking us to 1403 * make sure everything was done. 1404 */ 1405 if (ret == -EINVAL) 1406 ret = 0; 1407 } else { 1408 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 1409 } 1410 1411 unref: 1412 drm_gem_object_unreference(&obj->base); 1413 unlock: 1414 mutex_unlock(&dev->struct_mutex); 1415 return ret; 1416 } 1417 1418 /** 1419 * Called when user space has done writes to this buffer 1420 */ 1421 int 1422 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1423 struct drm_file *file) 1424 { 1425 struct drm_i915_gem_sw_finish *args = data; 1426 struct drm_i915_gem_object *obj; 1427 int ret = 0; 1428 1429 ret = i915_mutex_lock_interruptible(dev); 1430 if (ret) 1431 return ret; 1432 1433 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1434 if (&obj->base == NULL) { 1435 ret = -ENOENT; 1436 goto unlock; 1437 } 1438 1439 /* Pinned buffers may be scanout, so flush the cache */ 1440 if (obj->pin_display) 1441 i915_gem_object_flush_cpu_write_domain(obj, true); 1442 1443 drm_gem_object_unreference(&obj->base); 1444 unlock: 1445 mutex_unlock(&dev->struct_mutex); 1446 return ret; 1447 } 1448 1449 /** 1450 * Maps the contents of an object, returning the address it is mapped 1451 * into. 1452 * 1453 * While the mapping holds a reference on the contents of the object, it doesn't 1454 * imply a ref on the object itself. 1455 */ 1456 int 1457 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1458 struct drm_file *file) 1459 { 1460 struct drm_i915_gem_mmap *args = data; 1461 struct drm_gem_object *obj; 1462 unsigned long addr; 1463 struct proc *p = curproc; 1464 vm_map_t map = &p->p_vmspace->vm_map; 1465 vm_size_t size; 1466 int error = 0, rv; 1467 1468 obj = drm_gem_object_lookup(dev, file, args->handle); 1469 if (obj == NULL) 1470 return -ENOENT; 1471 1472 if (args->size == 0) 1473 goto out; 1474 1475 size = round_page(args->size); 1476 if (map->size + size > p->p_rlimit[RLIMIT_VMEM].rlim_cur) { 1477 error = -ENOMEM; 1478 goto out; 1479 } 1480 1481 /* 1482 * Call hint to ensure that NULL is not returned as a valid address 1483 * and to reduce vm_map traversals. XXX causes instability, use a 1484 * fixed low address as the start point instead to avoid the NULL 1485 * return issue. 1486 */ 1487 addr = PAGE_SIZE; 1488 1489 /* 1490 * Use 256KB alignment. It is unclear why this matters for a 1491 * virtual address but it appears to fix a number of application/X 1492 * crashes and kms console switching is much faster. 1493 */ 1494 vm_object_hold(obj->vm_obj); 1495 vm_object_reference_locked(obj->vm_obj); 1496 vm_object_drop(obj->vm_obj); 1497 1498 rv = vm_map_find(map, obj->vm_obj, NULL, 1499 args->offset, &addr, args->size, 1500 256 * 1024, /* align */ 1501 TRUE, /* fitit */ 1502 VM_MAPTYPE_NORMAL, /* maptype */ 1503 VM_PROT_READ | VM_PROT_WRITE, /* prot */ 1504 VM_PROT_READ | VM_PROT_WRITE, /* max */ 1505 MAP_SHARED /* cow */); 1506 if (rv != KERN_SUCCESS) { 1507 vm_object_deallocate(obj->vm_obj); 1508 error = -vm_mmap_to_errno(rv); 1509 } else { 1510 args->addr_ptr = (uint64_t)addr; 1511 } 1512 out: 1513 drm_gem_object_unreference(obj); 1514 return (error); 1515 } 1516 1517 /** 1518 * i915_gem_fault - fault a page into the GTT 1519 * 1520 * vm_obj is locked on entry and expected to be locked on return. 1521 * 1522 * The vm_pager has placemarked the object with an anonymous memory page 1523 * which we must replace atomically to avoid races against concurrent faults 1524 * on the same page. XXX we currently are unable to do this atomically. 1525 * 1526 * If we are to return an error we should not touch the anonymous page, 1527 * the caller will deallocate it. 1528 * 1529 * XXX Most GEM calls appear to be interruptable, but we can't hard loop 1530 * in that case. Release all resources and wait 1 tick before retrying. 1531 * This is a huge problem which needs to be fixed by getting rid of most 1532 * of the interruptability. The linux code does not retry but does appear 1533 * to have some sort of mechanism (VM_FAULT_NOPAGE ?) for the higher level 1534 * to be able to retry. 1535 * 1536 * -- 1537 * 1538 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1539 * from userspace. The fault handler takes care of binding the object to 1540 * the GTT (if needed), allocating and programming a fence register (again, 1541 * only if needed based on whether the old reg is still valid or the object 1542 * is tiled) and inserting a new PTE into the faulting process. 1543 * 1544 * Note that the faulting process may involve evicting existing objects 1545 * from the GTT and/or fence registers to make room. So performance may 1546 * suffer if the GTT working set is large or there are few fence registers 1547 * left. 1548 * 1549 * vm_obj is locked on entry and expected to be locked on return. The VM 1550 * pager has placed an anonymous memory page at (obj,offset) which we have 1551 * to replace. 1552 */ 1553 int i915_gem_fault(vm_object_t vm_obj, vm_ooffset_t offset, int prot, vm_page_t *mres) 1554 { 1555 struct drm_i915_gem_object *obj = to_intel_bo(vm_obj->handle); 1556 struct drm_device *dev = obj->base.dev; 1557 struct drm_i915_private *dev_priv = dev->dev_private; 1558 unsigned long page_offset; 1559 vm_page_t m, oldm = NULL; 1560 int ret = 0; 1561 int didpip = 0; 1562 bool write = !!(prot & VM_PROT_WRITE); 1563 1564 intel_runtime_pm_get(dev_priv); 1565 1566 /* We don't use vmf->pgoff since that has the fake offset */ 1567 page_offset = (unsigned long)offset; 1568 1569 retry: 1570 ret = i915_mutex_lock_interruptible(dev); 1571 if (ret) 1572 goto out; 1573 1574 trace_i915_gem_object_fault(obj, page_offset, true, write); 1575 1576 /* Try to flush the object off the GPU first without holding the lock. 1577 * Upon reacquiring the lock, we will perform our sanity checks and then 1578 * repeat the flush holding the lock in the normal manner to catch cases 1579 * where we are gazumped. 1580 */ 1581 ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write); 1582 if (ret) 1583 goto unlock; 1584 1585 /* Access to snoopable pages through the GTT is incoherent. */ 1586 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) { 1587 ret = -EFAULT; 1588 goto unlock; 1589 } 1590 1591 /* 1592 * START FREEBSD MAGIC 1593 * 1594 * Add a pip count to avoid destruction and certain other 1595 * complex operations (such as collapses?) while unlocked. 1596 */ 1597 if (didpip == 0) { 1598 vm_object_pip_add(vm_obj, 1); 1599 didpip = 1; 1600 } 1601 1602 /* 1603 * XXX We must currently remove the placeholder page now to avoid 1604 * a deadlock against a concurrent i915_gem_release_mmap(). 1605 * Otherwise concurrent operation will block on the busy page 1606 * while holding locks which we need to obtain. 1607 */ 1608 if (*mres != NULL) { 1609 oldm = *mres; 1610 vm_page_remove(oldm); 1611 *mres = NULL; 1612 } else { 1613 oldm = NULL; 1614 } 1615 1616 VM_OBJECT_UNLOCK(vm_obj); 1617 ret = 0; 1618 m = NULL; 1619 1620 /* 1621 * Since the object lock was dropped, another thread might have 1622 * faulted on the same GTT address and instantiated the mapping. 1623 * Recheck. 1624 */ 1625 VM_OBJECT_LOCK(vm_obj); 1626 m = vm_page_lookup(vm_obj, OFF_TO_IDX(offset)); 1627 if (m != NULL) { 1628 /* 1629 * Try to busy the page, retry on failure (non-zero ret). 1630 */ 1631 if (vm_page_busy_try(m, false)) { 1632 kprintf("i915_gem_fault: PG_BUSY\n"); 1633 VM_OBJECT_UNLOCK(vm_obj); 1634 mutex_unlock(&dev->struct_mutex); 1635 int dummy; 1636 tsleep(&dummy, 0, "delay", 1); /* XXX */ 1637 VM_OBJECT_LOCK(vm_obj); 1638 goto retry; 1639 } 1640 goto have_page; 1641 } 1642 /* 1643 * END FREEBSD MAGIC 1644 */ 1645 1646 /* 1647 * Object must be unlocked here to avoid deadlock during 1648 * other GEM calls. All goto targets expect the object to 1649 * be locked. 1650 */ 1651 VM_OBJECT_UNLOCK(vm_obj); 1652 1653 /* Now bind it into the GTT if needed */ 1654 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE); 1655 if (ret) { 1656 VM_OBJECT_LOCK(vm_obj); 1657 goto unlock; 1658 } 1659 1660 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1661 if (ret) { 1662 VM_OBJECT_LOCK(vm_obj); 1663 goto unpin; 1664 } 1665 1666 ret = i915_gem_object_get_fence(obj); 1667 if (ret) { 1668 VM_OBJECT_LOCK(vm_obj); 1669 goto unpin; 1670 } 1671 1672 obj->fault_mappable = true; 1673 1674 /* 1675 * Relock object for insertion, leave locked for return. 1676 */ 1677 VM_OBJECT_LOCK(vm_obj); 1678 m = vm_phys_fictitious_to_vm_page(dev_priv->gtt.mappable_base + 1679 i915_gem_obj_ggtt_offset(obj) + 1680 offset); 1681 if (m == NULL) { 1682 ret = -EFAULT; 1683 goto unpin; 1684 } 1685 KASSERT((m->flags & PG_FICTITIOUS) != 0, ("not fictitious %p", m)); 1686 KASSERT(m->wire_count == 1, ("wire_count not 1 %p", m)); 1687 1688 /* 1689 * Try to busy the page. Fails on non-zero return. 1690 */ 1691 if (vm_page_busy_try(m, false)) { 1692 VM_OBJECT_UNLOCK(vm_obj); 1693 i915_gem_object_ggtt_unpin(obj); 1694 kprintf("i915_gem_fault: PG_BUSY(2)\n"); 1695 i915_gem_object_ggtt_unpin(obj); 1696 mutex_unlock(&dev->struct_mutex); 1697 int dummy; 1698 tsleep(&dummy, 0, "delay", 1); /* XXX */ 1699 VM_OBJECT_LOCK(vm_obj); 1700 goto retry; 1701 } 1702 m->valid = VM_PAGE_BITS_ALL; 1703 1704 /* 1705 * Finally, remap it using the new GTT offset. 1706 * 1707 * (object expected to be in a locked state) 1708 */ 1709 vm_page_insert(m, vm_obj, OFF_TO_IDX(offset)); 1710 have_page: 1711 *mres = m; 1712 1713 i915_gem_object_ggtt_unpin(obj); 1714 mutex_unlock(&dev->struct_mutex); 1715 if (oldm != NULL) 1716 vm_page_free(oldm); 1717 if (didpip) 1718 vm_object_pip_wakeup(vm_obj); 1719 return (VM_PAGER_OK); 1720 1721 /* 1722 * ALTERNATIVE ERROR RETURN. 1723 * 1724 * OBJECT EXPECTED TO BE LOCKED. 1725 */ 1726 unpin: 1727 i915_gem_object_ggtt_unpin(obj); 1728 unlock: 1729 mutex_unlock(&dev->struct_mutex); 1730 out: 1731 switch (ret) { 1732 case -EIO: 1733 /* 1734 * We eat errors when the gpu is terminally wedged to avoid 1735 * userspace unduly crashing (gl has no provisions for mmaps to 1736 * fail). But any other -EIO isn't ours (e.g. swap in failure) 1737 * and so needs to be reported. 1738 */ 1739 if (!i915_terminally_wedged(&dev_priv->gpu_error)) { 1740 // ret = VM_FAULT_SIGBUS; 1741 break; 1742 } 1743 /* fall through */ 1744 case -EAGAIN: 1745 /* 1746 * EAGAIN means the gpu is hung and we'll wait for the error 1747 * handler to reset everything when re-faulting in 1748 * i915_mutex_lock_interruptible. 1749 */ 1750 /* fall through */ 1751 case -ERESTARTSYS: 1752 case -EINTR: 1753 VM_OBJECT_UNLOCK(vm_obj); 1754 int dummy; 1755 tsleep(&dummy, 0, "delay", 1); /* XXX */ 1756 VM_OBJECT_LOCK(vm_obj); 1757 goto retry; 1758 default: 1759 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 1760 ret = VM_PAGER_ERROR; 1761 break; 1762 } 1763 1764 intel_runtime_pm_put(dev_priv); 1765 1766 /* 1767 * Error return. We already NULL'd out *mres so we should be able 1768 * to free (oldm) here even though we are returning an error and the 1769 * caller usually handles the freeing. 1770 */ 1771 if (oldm != NULL) 1772 vm_page_free(oldm); 1773 if (didpip) 1774 vm_object_pip_wakeup(vm_obj); 1775 1776 return ret; 1777 } 1778 1779 /** 1780 * i915_gem_release_mmap - remove physical page mappings 1781 * @obj: obj in question 1782 * 1783 * Preserve the reservation of the mmapping with the DRM core code, but 1784 * relinquish ownership of the pages back to the system. 1785 * 1786 * It is vital that we remove the page mapping if we have mapped a tiled 1787 * object through the GTT and then lose the fence register due to 1788 * resource pressure. Similarly if the object has been moved out of the 1789 * aperture, than pages mapped into userspace must be revoked. Removing the 1790 * mapping will then trigger a page fault on the next user access, allowing 1791 * fixup by i915_gem_fault(). 1792 */ 1793 void 1794 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 1795 { 1796 vm_object_t devobj; 1797 vm_page_t m; 1798 int i, page_count; 1799 1800 if (!obj->fault_mappable) 1801 return; 1802 1803 devobj = cdev_pager_lookup(obj); 1804 if (devobj != NULL) { 1805 page_count = OFF_TO_IDX(obj->base.size); 1806 1807 VM_OBJECT_LOCK(devobj); 1808 for (i = 0; i < page_count; i++) { 1809 m = vm_page_lookup_busy_wait(devobj, i, TRUE, "915unm"); 1810 if (m == NULL) 1811 continue; 1812 cdev_pager_free_page(devobj, m); 1813 } 1814 VM_OBJECT_UNLOCK(devobj); 1815 vm_object_deallocate(devobj); 1816 } 1817 1818 obj->fault_mappable = false; 1819 } 1820 1821 void 1822 i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv) 1823 { 1824 struct drm_i915_gem_object *obj; 1825 1826 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) 1827 i915_gem_release_mmap(obj); 1828 } 1829 1830 uint32_t 1831 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) 1832 { 1833 uint32_t gtt_size; 1834 1835 if (INTEL_INFO(dev)->gen >= 4 || 1836 tiling_mode == I915_TILING_NONE) 1837 return size; 1838 1839 /* Previous chips need a power-of-two fence region when tiling */ 1840 if (INTEL_INFO(dev)->gen == 3) 1841 gtt_size = 1024*1024; 1842 else 1843 gtt_size = 512*1024; 1844 1845 while (gtt_size < size) 1846 gtt_size <<= 1; 1847 1848 return gtt_size; 1849 } 1850 1851 /** 1852 * i915_gem_get_gtt_alignment - return required GTT alignment for an object 1853 * @obj: object to check 1854 * 1855 * Return the required GTT alignment for an object, taking into account 1856 * potential fence register mapping. 1857 */ 1858 uint32_t 1859 i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size, 1860 int tiling_mode, bool fenced) 1861 { 1862 /* 1863 * Minimum alignment is 4k (GTT page size), but might be greater 1864 * if a fence register is needed for the object. 1865 */ 1866 if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) || 1867 tiling_mode == I915_TILING_NONE) 1868 return 4096; 1869 1870 /* 1871 * Previous chips need to be aligned to the size of the smallest 1872 * fence register that can contain the object. 1873 */ 1874 return i915_gem_get_gtt_size(dev, size, tiling_mode); 1875 } 1876 1877 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) 1878 { 1879 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 1880 int ret; 1881 1882 #if 0 1883 if (drm_vma_node_has_offset(&obj->base.vma_node)) 1884 return 0; 1885 #endif 1886 1887 dev_priv->mm.shrinker_no_lock_stealing = true; 1888 1889 ret = drm_gem_create_mmap_offset(&obj->base); 1890 if (ret != -ENOSPC) 1891 goto out; 1892 1893 /* Badly fragmented mmap space? The only way we can recover 1894 * space is by destroying unwanted objects. We can't randomly release 1895 * mmap_offsets as userspace expects them to be persistent for the 1896 * lifetime of the objects. The closest we can is to release the 1897 * offsets on purgeable objects by truncating it and marking it purged, 1898 * which prevents userspace from ever using that object again. 1899 */ 1900 i915_gem_purge(dev_priv, obj->base.size >> PAGE_SHIFT); 1901 ret = drm_gem_create_mmap_offset(&obj->base); 1902 if (ret != -ENOSPC) 1903 goto out; 1904 1905 i915_gem_shrink_all(dev_priv); 1906 ret = drm_gem_create_mmap_offset(&obj->base); 1907 out: 1908 dev_priv->mm.shrinker_no_lock_stealing = false; 1909 1910 return ret; 1911 } 1912 1913 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) 1914 { 1915 drm_gem_free_mmap_offset(&obj->base); 1916 } 1917 1918 int 1919 i915_gem_mmap_gtt(struct drm_file *file, 1920 struct drm_device *dev, 1921 uint32_t handle, 1922 uint64_t *offset) 1923 { 1924 struct drm_i915_private *dev_priv = dev->dev_private; 1925 struct drm_i915_gem_object *obj; 1926 int ret; 1927 1928 ret = i915_mutex_lock_interruptible(dev); 1929 if (ret) 1930 return ret; 1931 1932 obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle)); 1933 if (&obj->base == NULL) { 1934 ret = -ENOENT; 1935 goto unlock; 1936 } 1937 1938 if (obj->base.size > dev_priv->gtt.mappable_end) { 1939 ret = -E2BIG; 1940 goto out; 1941 } 1942 1943 if (obj->madv != I915_MADV_WILLNEED) { 1944 DRM_DEBUG("Attempting to mmap a purgeable buffer\n"); 1945 ret = -EFAULT; 1946 goto out; 1947 } 1948 1949 ret = i915_gem_object_create_mmap_offset(obj); 1950 if (ret) 1951 goto out; 1952 1953 *offset = DRM_GEM_MAPPING_OFF(obj->base.map_list.key) | 1954 DRM_GEM_MAPPING_KEY; 1955 1956 out: 1957 drm_gem_object_unreference(&obj->base); 1958 unlock: 1959 mutex_unlock(&dev->struct_mutex); 1960 return ret; 1961 } 1962 1963 /** 1964 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 1965 * @dev: DRM device 1966 * @data: GTT mapping ioctl data 1967 * @file: GEM object info 1968 * 1969 * Simply returns the fake offset to userspace so it can mmap it. 1970 * The mmap call will end up in drm_gem_mmap(), which will set things 1971 * up so we can get faults in the handler above. 1972 * 1973 * The fault handler will take care of binding the object into the GTT 1974 * (since it may have been evicted to make room for something), allocating 1975 * a fence register, and mapping the appropriate aperture address into 1976 * userspace. 1977 */ 1978 int 1979 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 1980 struct drm_file *file) 1981 { 1982 struct drm_i915_gem_mmap_gtt *args = data; 1983 1984 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); 1985 } 1986 1987 static inline int 1988 i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj) 1989 { 1990 return obj->madv == I915_MADV_DONTNEED; 1991 } 1992 1993 /* Immediately discard the backing storage */ 1994 static void 1995 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 1996 { 1997 vm_object_t vm_obj; 1998 1999 vm_obj = obj->base.vm_obj; 2000 VM_OBJECT_LOCK(vm_obj); 2001 vm_object_page_remove(vm_obj, 0, 0, false); 2002 VM_OBJECT_UNLOCK(vm_obj); 2003 2004 obj->madv = __I915_MADV_PURGED; 2005 } 2006 2007 /* Try to discard unwanted pages */ 2008 static void 2009 i915_gem_object_invalidate(struct drm_i915_gem_object *obj) 2010 { 2011 #if 0 2012 struct address_space *mapping; 2013 #endif 2014 2015 switch (obj->madv) { 2016 case I915_MADV_DONTNEED: 2017 i915_gem_object_truncate(obj); 2018 case __I915_MADV_PURGED: 2019 return; 2020 } 2021 2022 #if 0 2023 if (obj->base.filp == NULL) 2024 return; 2025 2026 mapping = file_inode(obj->base.filp)->i_mapping, 2027 invalidate_mapping_pages(mapping, 0, (loff_t)-1); 2028 #endif 2029 } 2030 2031 static void 2032 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) 2033 { 2034 int page_count = obj->base.size / PAGE_SIZE; 2035 int i, ret; 2036 2037 if (!obj->pages) 2038 return; 2039 2040 BUG_ON(obj->madv == __I915_MADV_PURGED); 2041 2042 ret = i915_gem_object_set_to_cpu_domain(obj, true); 2043 if (ret) { 2044 /* In the event of a disaster, abandon all caches and 2045 * hope for the best. 2046 */ 2047 WARN_ON(ret != -EIO); 2048 i915_gem_clflush_object(obj, true); 2049 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 2050 } 2051 2052 if (i915_gem_object_needs_bit17_swizzle(obj)) 2053 i915_gem_object_save_bit_17_swizzle(obj); 2054 2055 if (obj->madv == I915_MADV_DONTNEED) 2056 obj->dirty = 0; 2057 2058 for (i = 0; i < page_count; i++) { 2059 struct vm_page *page = obj->pages[i]; 2060 2061 if (obj->dirty) 2062 set_page_dirty(page); 2063 2064 if (obj->madv == I915_MADV_WILLNEED) 2065 mark_page_accessed(page); 2066 2067 vm_page_busy_wait(obj->pages[i], FALSE, "i915gem"); 2068 vm_page_unwire(obj->pages[i], 1); 2069 vm_page_wakeup(obj->pages[i]); 2070 } 2071 obj->dirty = 0; 2072 2073 kfree(obj->pages); 2074 obj->pages = NULL; 2075 } 2076 2077 int 2078 i915_gem_object_put_pages(struct drm_i915_gem_object *obj) 2079 { 2080 const struct drm_i915_gem_object_ops *ops = obj->ops; 2081 2082 if (obj->pages == NULL) 2083 return 0; 2084 2085 if (obj->pages_pin_count) 2086 return -EBUSY; 2087 2088 BUG_ON(i915_gem_obj_bound_any(obj)); 2089 2090 /* ->put_pages might need to allocate memory for the bit17 swizzle 2091 * array, hence protect them from being reaped by removing them from gtt 2092 * lists early. */ 2093 list_del(&obj->global_list); 2094 2095 ops->put_pages(obj); 2096 obj->pages = NULL; 2097 2098 i915_gem_object_invalidate(obj); 2099 2100 return 0; 2101 } 2102 2103 static unsigned long 2104 __i915_gem_shrink(struct drm_i915_private *dev_priv, long target, 2105 bool purgeable_only) 2106 { 2107 struct list_head still_in_list; 2108 struct drm_i915_gem_object *obj; 2109 unsigned long count = 0; 2110 2111 /* 2112 * As we may completely rewrite the (un)bound list whilst unbinding 2113 * (due to retiring requests) we have to strictly process only 2114 * one element of the list at the time, and recheck the list 2115 * on every iteration. 2116 * 2117 * In particular, we must hold a reference whilst removing the 2118 * object as we may end up waiting for and/or retiring the objects. 2119 * This might release the final reference (held by the active list) 2120 * and result in the object being freed from under us. This is 2121 * similar to the precautions the eviction code must take whilst 2122 * removing objects. 2123 * 2124 * Also note that although these lists do not hold a reference to 2125 * the object we can safely grab one here: The final object 2126 * unreferencing and the bound_list are both protected by the 2127 * dev->struct_mutex and so we won't ever be able to observe an 2128 * object on the bound_list with a reference count equals 0. 2129 */ 2130 INIT_LIST_HEAD(&still_in_list); 2131 while (count < target && !list_empty(&dev_priv->mm.unbound_list)) { 2132 obj = list_first_entry(&dev_priv->mm.unbound_list, 2133 typeof(*obj), global_list); 2134 list_move_tail(&obj->global_list, &still_in_list); 2135 2136 if (!i915_gem_object_is_purgeable(obj) && purgeable_only) 2137 continue; 2138 2139 drm_gem_object_reference(&obj->base); 2140 2141 if (i915_gem_object_put_pages(obj) == 0) 2142 count += obj->base.size >> PAGE_SHIFT; 2143 2144 drm_gem_object_unreference(&obj->base); 2145 } 2146 list_splice(&still_in_list, &dev_priv->mm.unbound_list); 2147 2148 INIT_LIST_HEAD(&still_in_list); 2149 while (count < target && !list_empty(&dev_priv->mm.bound_list)) { 2150 struct i915_vma *vma, *v; 2151 2152 obj = list_first_entry(&dev_priv->mm.bound_list, 2153 typeof(*obj), global_list); 2154 list_move_tail(&obj->global_list, &still_in_list); 2155 2156 if (!i915_gem_object_is_purgeable(obj) && purgeable_only) 2157 continue; 2158 2159 drm_gem_object_reference(&obj->base); 2160 2161 list_for_each_entry_safe(vma, v, &obj->vma_list, vma_link) 2162 if (i915_vma_unbind(vma)) 2163 break; 2164 2165 if (i915_gem_object_put_pages(obj) == 0) 2166 count += obj->base.size >> PAGE_SHIFT; 2167 2168 drm_gem_object_unreference(&obj->base); 2169 } 2170 list_splice(&still_in_list, &dev_priv->mm.bound_list); 2171 2172 return count; 2173 } 2174 2175 static unsigned long 2176 i915_gem_purge(struct drm_i915_private *dev_priv, long target) 2177 { 2178 return __i915_gem_shrink(dev_priv, target, true); 2179 } 2180 2181 static unsigned long 2182 i915_gem_shrink_all(struct drm_i915_private *dev_priv) 2183 { 2184 i915_gem_evict_everything(dev_priv->dev); 2185 return __i915_gem_shrink(dev_priv, LONG_MAX, false); 2186 } 2187 2188 static int 2189 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 2190 { 2191 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2192 int page_count, i, j; 2193 vm_object_t vm_obj; 2194 struct vm_page *page; 2195 2196 /* Assert that the object is not currently in any GPU domain. As it 2197 * wasn't in the GTT, there shouldn't be any way it could have been in 2198 * a GPU cache 2199 */ 2200 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); 2201 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); 2202 2203 page_count = obj->base.size / PAGE_SIZE; 2204 obj->pages = kmalloc(page_count * sizeof(vm_page_t), M_DRM, 2205 M_WAITOK); 2206 2207 /* Get the list of pages out of our struct file. They'll be pinned 2208 * at this point until we release them. 2209 * 2210 * Fail silently without starting the shrinker 2211 */ 2212 vm_obj = obj->base.vm_obj; 2213 VM_OBJECT_LOCK(vm_obj); 2214 for (i = 0; i < page_count; i++) { 2215 page = shmem_read_mapping_page(vm_obj, i); 2216 if (IS_ERR(page)) { 2217 i915_gem_purge(dev_priv, page_count); 2218 page = shmem_read_mapping_page(vm_obj, i); 2219 } 2220 if (IS_ERR(page)) { 2221 /* We've tried hard to allocate the memory by reaping 2222 * our own buffer, now let the real VM do its job and 2223 * go down in flames if truly OOM. 2224 */ 2225 2226 i915_gem_shrink_all(dev_priv); 2227 page = shmem_read_mapping_page(vm_obj, i); 2228 if (IS_ERR(page)) 2229 goto err_pages; 2230 } 2231 #ifdef CONFIG_SWIOTLB 2232 if (swiotlb_nr_tbl()) { 2233 st->nents++; 2234 sg_set_page(sg, page, PAGE_SIZE, 0); 2235 sg = sg_next(sg); 2236 continue; 2237 } 2238 #endif 2239 obj->pages[i] = page; 2240 } 2241 #ifdef CONFIG_SWIOTLB 2242 if (!swiotlb_nr_tbl()) 2243 #endif 2244 VM_OBJECT_UNLOCK(vm_obj); 2245 2246 if (i915_gem_object_needs_bit17_swizzle(obj)) 2247 i915_gem_object_do_bit_17_swizzle(obj); 2248 2249 return 0; 2250 2251 err_pages: 2252 for (j = 0; j < i; j++) { 2253 page = obj->pages[j]; 2254 vm_page_busy_wait(page, FALSE, "i915gem"); 2255 vm_page_unwire(page, 0); 2256 vm_page_wakeup(page); 2257 } 2258 VM_OBJECT_UNLOCK(vm_obj); 2259 kfree(obj->pages); 2260 obj->pages = NULL; 2261 return (-EIO); 2262 } 2263 2264 /* Ensure that the associated pages are gathered from the backing storage 2265 * and pinned into our object. i915_gem_object_get_pages() may be called 2266 * multiple times before they are released by a single call to 2267 * i915_gem_object_put_pages() - once the pages are no longer referenced 2268 * either as a result of memory pressure (reaping pages under the shrinker) 2269 * or as the object is itself released. 2270 */ 2271 int 2272 i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2273 { 2274 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2275 const struct drm_i915_gem_object_ops *ops = obj->ops; 2276 int ret; 2277 2278 if (obj->pages) 2279 return 0; 2280 2281 if (obj->madv != I915_MADV_WILLNEED) { 2282 DRM_DEBUG("Attempting to obtain a purgeable object\n"); 2283 return -EFAULT; 2284 } 2285 2286 BUG_ON(obj->pages_pin_count); 2287 2288 ret = ops->get_pages(obj); 2289 if (ret) 2290 return ret; 2291 2292 list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list); 2293 return 0; 2294 } 2295 2296 static void 2297 i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, 2298 struct intel_engine_cs *ring) 2299 { 2300 struct drm_device *dev = obj->base.dev; 2301 struct drm_i915_private *dev_priv = dev->dev_private; 2302 u32 seqno = intel_ring_get_seqno(ring); 2303 2304 BUG_ON(ring == NULL); 2305 if (obj->ring != ring && obj->last_write_seqno) { 2306 /* Keep the seqno relative to the current ring */ 2307 obj->last_write_seqno = seqno; 2308 } 2309 obj->ring = ring; 2310 2311 /* Add a reference if we're newly entering the active list. */ 2312 if (!obj->active) { 2313 drm_gem_object_reference(&obj->base); 2314 obj->active = 1; 2315 } 2316 2317 list_move_tail(&obj->ring_list, &ring->active_list); 2318 2319 obj->last_read_seqno = seqno; 2320 2321 if (obj->fenced_gpu_access) { 2322 obj->last_fenced_seqno = seqno; 2323 2324 /* Bump MRU to take account of the delayed flush */ 2325 if (obj->fence_reg != I915_FENCE_REG_NONE) { 2326 struct drm_i915_fence_reg *reg; 2327 2328 reg = &dev_priv->fence_regs[obj->fence_reg]; 2329 list_move_tail(®->lru_list, 2330 &dev_priv->mm.fence_list); 2331 } 2332 } 2333 } 2334 2335 void i915_vma_move_to_active(struct i915_vma *vma, 2336 struct intel_engine_cs *ring) 2337 { 2338 list_move_tail(&vma->mm_list, &vma->vm->active_list); 2339 return i915_gem_object_move_to_active(vma->obj, ring); 2340 } 2341 2342 static void 2343 i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj) 2344 { 2345 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2346 struct i915_address_space *vm; 2347 struct i915_vma *vma; 2348 2349 BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS); 2350 BUG_ON(!obj->active); 2351 2352 list_for_each_entry(vm, &dev_priv->vm_list, global_link) { 2353 vma = i915_gem_obj_to_vma(obj, vm); 2354 if (vma && !list_empty(&vma->mm_list)) 2355 list_move_tail(&vma->mm_list, &vm->inactive_list); 2356 } 2357 2358 intel_fb_obj_flush(obj, true); 2359 2360 list_del_init(&obj->ring_list); 2361 obj->ring = NULL; 2362 2363 obj->last_read_seqno = 0; 2364 obj->last_write_seqno = 0; 2365 obj->base.write_domain = 0; 2366 2367 obj->last_fenced_seqno = 0; 2368 obj->fenced_gpu_access = false; 2369 2370 obj->active = 0; 2371 drm_gem_object_unreference(&obj->base); 2372 2373 WARN_ON(i915_verify_lists(dev)); 2374 } 2375 2376 static void 2377 i915_gem_object_retire(struct drm_i915_gem_object *obj) 2378 { 2379 struct intel_engine_cs *ring = obj->ring; 2380 2381 if (ring == NULL) 2382 return; 2383 2384 if (i915_seqno_passed(ring->get_seqno(ring, true), 2385 obj->last_read_seqno)) 2386 i915_gem_object_move_to_inactive(obj); 2387 } 2388 2389 static int 2390 i915_gem_init_seqno(struct drm_device *dev, u32 seqno) 2391 { 2392 struct drm_i915_private *dev_priv = dev->dev_private; 2393 struct intel_engine_cs *ring; 2394 int ret, i, j; 2395 2396 /* Carefully retire all requests without writing to the rings */ 2397 for_each_ring(ring, dev_priv, i) { 2398 ret = intel_ring_idle(ring); 2399 if (ret) 2400 return ret; 2401 } 2402 i915_gem_retire_requests(dev); 2403 2404 /* Finally reset hw state */ 2405 for_each_ring(ring, dev_priv, i) { 2406 intel_ring_init_seqno(ring, seqno); 2407 2408 for (j = 0; j < ARRAY_SIZE(ring->semaphore.sync_seqno); j++) 2409 ring->semaphore.sync_seqno[j] = 0; 2410 } 2411 2412 return 0; 2413 } 2414 2415 int i915_gem_set_seqno(struct drm_device *dev, u32 seqno) 2416 { 2417 struct drm_i915_private *dev_priv = dev->dev_private; 2418 int ret; 2419 2420 if (seqno == 0) 2421 return -EINVAL; 2422 2423 /* HWS page needs to be set less than what we 2424 * will inject to ring 2425 */ 2426 ret = i915_gem_init_seqno(dev, seqno - 1); 2427 if (ret) 2428 return ret; 2429 2430 /* Carefully set the last_seqno value so that wrap 2431 * detection still works 2432 */ 2433 dev_priv->next_seqno = seqno; 2434 dev_priv->last_seqno = seqno - 1; 2435 if (dev_priv->last_seqno == 0) 2436 dev_priv->last_seqno--; 2437 2438 return 0; 2439 } 2440 2441 int 2442 i915_gem_get_seqno(struct drm_device *dev, u32 *seqno) 2443 { 2444 struct drm_i915_private *dev_priv = dev->dev_private; 2445 2446 /* reserve 0 for non-seqno */ 2447 if (dev_priv->next_seqno == 0) { 2448 int ret = i915_gem_init_seqno(dev, 0); 2449 if (ret) 2450 return ret; 2451 2452 dev_priv->next_seqno = 1; 2453 } 2454 2455 *seqno = dev_priv->last_seqno = dev_priv->next_seqno++; 2456 return 0; 2457 } 2458 2459 int __i915_add_request(struct intel_engine_cs *ring, 2460 struct drm_file *file, 2461 struct drm_i915_gem_object *obj, 2462 u32 *out_seqno) 2463 { 2464 struct drm_i915_private *dev_priv = ring->dev->dev_private; 2465 struct drm_i915_gem_request *request; 2466 u32 request_ring_position, request_start; 2467 int ret; 2468 2469 request_start = intel_ring_get_tail(ring->buffer); 2470 /* 2471 * Emit any outstanding flushes - execbuf can fail to emit the flush 2472 * after having emitted the batchbuffer command. Hence we need to fix 2473 * things up similar to emitting the lazy request. The difference here 2474 * is that the flush _must_ happen before the next request, no matter 2475 * what. 2476 */ 2477 ret = intel_ring_flush_all_caches(ring); 2478 if (ret) 2479 return ret; 2480 2481 request = ring->preallocated_lazy_request; 2482 if (WARN_ON(request == NULL)) 2483 return -ENOMEM; 2484 2485 /* Record the position of the start of the request so that 2486 * should we detect the updated seqno part-way through the 2487 * GPU processing the request, we never over-estimate the 2488 * position of the head. 2489 */ 2490 request_ring_position = intel_ring_get_tail(ring->buffer); 2491 2492 ret = ring->add_request(ring); 2493 if (ret) 2494 return ret; 2495 2496 request->seqno = intel_ring_get_seqno(ring); 2497 request->ring = ring; 2498 request->head = request_start; 2499 request->tail = request_ring_position; 2500 2501 /* Whilst this request exists, batch_obj will be on the 2502 * active_list, and so will hold the active reference. Only when this 2503 * request is retired will the the batch_obj be moved onto the 2504 * inactive_list and lose its active reference. Hence we do not need 2505 * to explicitly hold another reference here. 2506 */ 2507 request->batch_obj = obj; 2508 2509 /* Hold a reference to the current context so that we can inspect 2510 * it later in case a hangcheck error event fires. 2511 */ 2512 request->ctx = ring->last_context; 2513 if (request->ctx) 2514 i915_gem_context_reference(request->ctx); 2515 2516 request->emitted_jiffies = jiffies; 2517 list_add_tail(&request->list, &ring->request_list); 2518 request->file_priv = NULL; 2519 2520 if (file) { 2521 struct drm_i915_file_private *file_priv = file->driver_priv; 2522 2523 spin_lock(&file_priv->mm.lock); 2524 request->file_priv = file_priv; 2525 list_add_tail(&request->client_list, 2526 &file_priv->mm.request_list); 2527 spin_unlock(&file_priv->mm.lock); 2528 } 2529 2530 trace_i915_gem_request_add(ring, request->seqno); 2531 ring->outstanding_lazy_seqno = 0; 2532 ring->preallocated_lazy_request = NULL; 2533 2534 if (!dev_priv->ums.mm_suspended) { 2535 i915_queue_hangcheck(ring->dev); 2536 2537 cancel_delayed_work_sync(&dev_priv->mm.idle_work); 2538 queue_delayed_work(dev_priv->wq, 2539 &dev_priv->mm.retire_work, 2540 round_jiffies_up_relative(HZ)); 2541 intel_mark_busy(dev_priv->dev); 2542 } 2543 2544 if (out_seqno) 2545 *out_seqno = request->seqno; 2546 return 0; 2547 } 2548 2549 static inline void 2550 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) 2551 { 2552 struct drm_i915_file_private *file_priv = request->file_priv; 2553 2554 if (!file_priv) 2555 return; 2556 2557 spin_lock(&file_priv->mm.lock); 2558 list_del(&request->client_list); 2559 request->file_priv = NULL; 2560 spin_unlock(&file_priv->mm.lock); 2561 } 2562 2563 static bool i915_context_is_banned(struct drm_i915_private *dev_priv, 2564 const struct intel_context *ctx) 2565 { 2566 unsigned long elapsed; 2567 2568 elapsed = get_seconds() - ctx->hang_stats.guilty_ts; 2569 2570 if (ctx->hang_stats.banned) 2571 return true; 2572 2573 if (elapsed <= DRM_I915_CTX_BAN_PERIOD) { 2574 if (!i915_gem_context_is_default(ctx)) { 2575 DRM_DEBUG("context hanging too fast, banning!\n"); 2576 return true; 2577 } else if (i915_stop_ring_allow_ban(dev_priv)) { 2578 if (i915_stop_ring_allow_warn(dev_priv)) 2579 DRM_ERROR("gpu hanging too fast, banning!\n"); 2580 return true; 2581 } 2582 } 2583 2584 return false; 2585 } 2586 2587 static void i915_set_reset_status(struct drm_i915_private *dev_priv, 2588 struct intel_context *ctx, 2589 const bool guilty) 2590 { 2591 struct i915_ctx_hang_stats *hs; 2592 2593 if (WARN_ON(!ctx)) 2594 return; 2595 2596 hs = &ctx->hang_stats; 2597 2598 if (guilty) { 2599 hs->banned = i915_context_is_banned(dev_priv, ctx); 2600 hs->batch_active++; 2601 hs->guilty_ts = get_seconds(); 2602 } else { 2603 hs->batch_pending++; 2604 } 2605 } 2606 2607 static void i915_gem_free_request(struct drm_i915_gem_request *request) 2608 { 2609 list_del(&request->list); 2610 i915_gem_request_remove_from_client(request); 2611 2612 if (request->ctx) 2613 i915_gem_context_unreference(request->ctx); 2614 2615 kfree(request); 2616 } 2617 2618 struct drm_i915_gem_request * 2619 i915_gem_find_active_request(struct intel_engine_cs *ring) 2620 { 2621 struct drm_i915_gem_request *request; 2622 u32 completed_seqno; 2623 2624 completed_seqno = ring->get_seqno(ring, false); 2625 2626 list_for_each_entry(request, &ring->request_list, list) { 2627 if (i915_seqno_passed(completed_seqno, request->seqno)) 2628 continue; 2629 2630 return request; 2631 } 2632 2633 return NULL; 2634 } 2635 2636 static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv, 2637 struct intel_engine_cs *ring) 2638 { 2639 struct drm_i915_gem_request *request; 2640 bool ring_hung; 2641 2642 request = i915_gem_find_active_request(ring); 2643 2644 if (request == NULL) 2645 return; 2646 2647 ring_hung = ring->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG; 2648 2649 i915_set_reset_status(dev_priv, request->ctx, ring_hung); 2650 2651 list_for_each_entry_continue(request, &ring->request_list, list) 2652 i915_set_reset_status(dev_priv, request->ctx, false); 2653 } 2654 2655 static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv, 2656 struct intel_engine_cs *ring) 2657 { 2658 while (!list_empty(&ring->active_list)) { 2659 struct drm_i915_gem_object *obj; 2660 2661 obj = list_first_entry(&ring->active_list, 2662 struct drm_i915_gem_object, 2663 ring_list); 2664 2665 i915_gem_object_move_to_inactive(obj); 2666 } 2667 2668 /* 2669 * We must free the requests after all the corresponding objects have 2670 * been moved off active lists. Which is the same order as the normal 2671 * retire_requests function does. This is important if object hold 2672 * implicit references on things like e.g. ppgtt address spaces through 2673 * the request. 2674 */ 2675 while (!list_empty(&ring->request_list)) { 2676 struct drm_i915_gem_request *request; 2677 2678 request = list_first_entry(&ring->request_list, 2679 struct drm_i915_gem_request, 2680 list); 2681 2682 i915_gem_free_request(request); 2683 } 2684 2685 /* These may not have been flush before the reset, do so now */ 2686 kfree(ring->preallocated_lazy_request); 2687 ring->preallocated_lazy_request = NULL; 2688 ring->outstanding_lazy_seqno = 0; 2689 } 2690 2691 void i915_gem_restore_fences(struct drm_device *dev) 2692 { 2693 struct drm_i915_private *dev_priv = dev->dev_private; 2694 int i; 2695 2696 for (i = 0; i < dev_priv->num_fence_regs; i++) { 2697 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; 2698 2699 /* 2700 * Commit delayed tiling changes if we have an object still 2701 * attached to the fence, otherwise just clear the fence. 2702 */ 2703 if (reg->obj) { 2704 i915_gem_object_update_fence(reg->obj, reg, 2705 reg->obj->tiling_mode); 2706 } else { 2707 i915_gem_write_fence(dev, i, NULL); 2708 } 2709 } 2710 } 2711 2712 void i915_gem_reset(struct drm_device *dev) 2713 { 2714 struct drm_i915_private *dev_priv = dev->dev_private; 2715 struct intel_engine_cs *ring; 2716 int i; 2717 2718 /* 2719 * Before we free the objects from the requests, we need to inspect 2720 * them for finding the guilty party. As the requests only borrow 2721 * their reference to the objects, the inspection must be done first. 2722 */ 2723 for_each_ring(ring, dev_priv, i) 2724 i915_gem_reset_ring_status(dev_priv, ring); 2725 2726 for_each_ring(ring, dev_priv, i) 2727 i915_gem_reset_ring_cleanup(dev_priv, ring); 2728 2729 i915_gem_context_reset(dev); 2730 2731 i915_gem_restore_fences(dev); 2732 } 2733 2734 /** 2735 * This function clears the request list as sequence numbers are passed. 2736 */ 2737 void 2738 i915_gem_retire_requests_ring(struct intel_engine_cs *ring) 2739 { 2740 uint32_t seqno; 2741 2742 if (list_empty(&ring->request_list)) 2743 return; 2744 2745 WARN_ON(i915_verify_lists(ring->dev)); 2746 2747 seqno = ring->get_seqno(ring, true); 2748 2749 /* Move any buffers on the active list that are no longer referenced 2750 * by the ringbuffer to the flushing/inactive lists as appropriate, 2751 * before we free the context associated with the requests. 2752 */ 2753 while (!list_empty(&ring->active_list)) { 2754 struct drm_i915_gem_object *obj; 2755 2756 obj = list_first_entry(&ring->active_list, 2757 struct drm_i915_gem_object, 2758 ring_list); 2759 2760 if (!i915_seqno_passed(seqno, obj->last_read_seqno)) 2761 break; 2762 2763 i915_gem_object_move_to_inactive(obj); 2764 } 2765 2766 2767 while (!list_empty(&ring->request_list)) { 2768 struct drm_i915_gem_request *request; 2769 2770 request = list_first_entry(&ring->request_list, 2771 struct drm_i915_gem_request, 2772 list); 2773 2774 if (!i915_seqno_passed(seqno, request->seqno)) 2775 break; 2776 2777 trace_i915_gem_request_retire(ring, request->seqno); 2778 /* We know the GPU must have read the request to have 2779 * sent us the seqno + interrupt, so use the position 2780 * of tail of the request to update the last known position 2781 * of the GPU head. 2782 */ 2783 ring->buffer->last_retired_head = request->tail; 2784 2785 i915_gem_free_request(request); 2786 } 2787 2788 if (unlikely(ring->trace_irq_seqno && 2789 i915_seqno_passed(seqno, ring->trace_irq_seqno))) { 2790 ring->irq_put(ring); 2791 ring->trace_irq_seqno = 0; 2792 } 2793 2794 WARN_ON(i915_verify_lists(ring->dev)); 2795 } 2796 2797 bool 2798 i915_gem_retire_requests(struct drm_device *dev) 2799 { 2800 struct drm_i915_private *dev_priv = dev->dev_private; 2801 struct intel_engine_cs *ring; 2802 bool idle = true; 2803 int i; 2804 2805 for_each_ring(ring, dev_priv, i) { 2806 i915_gem_retire_requests_ring(ring); 2807 idle &= list_empty(&ring->request_list); 2808 } 2809 2810 if (idle) 2811 mod_delayed_work(dev_priv->wq, 2812 &dev_priv->mm.idle_work, 2813 msecs_to_jiffies(100)); 2814 2815 return idle; 2816 } 2817 2818 static void 2819 i915_gem_retire_work_handler(struct work_struct *work) 2820 { 2821 struct drm_i915_private *dev_priv = 2822 container_of(work, typeof(*dev_priv), mm.retire_work.work); 2823 struct drm_device *dev = dev_priv->dev; 2824 bool idle; 2825 2826 /* Come back later if the device is busy... */ 2827 idle = false; 2828 if (mutex_trylock(&dev->struct_mutex)) { 2829 idle = i915_gem_retire_requests(dev); 2830 mutex_unlock(&dev->struct_mutex); 2831 } 2832 if (!idle) 2833 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 2834 round_jiffies_up_relative(HZ)); 2835 } 2836 2837 static void 2838 i915_gem_idle_work_handler(struct work_struct *work) 2839 { 2840 struct drm_i915_private *dev_priv = 2841 container_of(work, typeof(*dev_priv), mm.idle_work.work); 2842 2843 intel_mark_idle(dev_priv->dev); 2844 } 2845 2846 /** 2847 * Ensures that an object will eventually get non-busy by flushing any required 2848 * write domains, emitting any outstanding lazy request and retiring and 2849 * completed requests. 2850 */ 2851 static int 2852 i915_gem_object_flush_active(struct drm_i915_gem_object *obj) 2853 { 2854 int ret; 2855 2856 if (obj->active) { 2857 ret = i915_gem_check_olr(obj->ring, obj->last_read_seqno); 2858 if (ret) 2859 return ret; 2860 2861 i915_gem_retire_requests_ring(obj->ring); 2862 } 2863 2864 return 0; 2865 } 2866 2867 /** 2868 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT 2869 * @DRM_IOCTL_ARGS: standard ioctl arguments 2870 * 2871 * Returns 0 if successful, else an error is returned with the remaining time in 2872 * the timeout parameter. 2873 * -ETIME: object is still busy after timeout 2874 * -ERESTARTSYS: signal interrupted the wait 2875 * -ENONENT: object doesn't exist 2876 * Also possible, but rare: 2877 * -EAGAIN: GPU wedged 2878 * -ENOMEM: damn 2879 * -ENODEV: Internal IRQ fail 2880 * -E?: The add request failed 2881 * 2882 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any 2883 * non-zero timeout parameter the wait ioctl will wait for the given number of 2884 * nanoseconds on an object becoming unbusy. Since the wait itself does so 2885 * without holding struct_mutex the object may become re-busied before this 2886 * function completes. A similar but shorter * race condition exists in the busy 2887 * ioctl 2888 */ 2889 int 2890 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 2891 { 2892 struct drm_i915_private *dev_priv = dev->dev_private; 2893 struct drm_i915_gem_wait *args = data; 2894 struct drm_i915_gem_object *obj; 2895 struct intel_engine_cs *ring = NULL; 2896 struct timespec timeout_stack, *timeout = NULL; 2897 unsigned reset_counter; 2898 u32 seqno = 0; 2899 int ret = 0; 2900 2901 if (args->timeout_ns >= 0) { 2902 timeout_stack = ns_to_timespec(args->timeout_ns); 2903 timeout = &timeout_stack; 2904 } 2905 2906 ret = i915_mutex_lock_interruptible(dev); 2907 if (ret) 2908 return ret; 2909 2910 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->bo_handle)); 2911 if (&obj->base == NULL) { 2912 mutex_unlock(&dev->struct_mutex); 2913 return -ENOENT; 2914 } 2915 2916 /* Need to make sure the object gets inactive eventually. */ 2917 ret = i915_gem_object_flush_active(obj); 2918 if (ret) 2919 goto out; 2920 2921 if (obj->active) { 2922 seqno = obj->last_read_seqno; 2923 ring = obj->ring; 2924 } 2925 2926 if (seqno == 0) 2927 goto out; 2928 2929 /* Do this after OLR check to make sure we make forward progress polling 2930 * on this IOCTL with a 0 timeout (like busy ioctl) 2931 */ 2932 if (!args->timeout_ns) { 2933 ret = -ETIMEDOUT; 2934 goto out; 2935 } 2936 2937 drm_gem_object_unreference(&obj->base); 2938 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 2939 mutex_unlock(&dev->struct_mutex); 2940 2941 ret = __wait_seqno(ring, seqno, reset_counter, true, timeout, file->driver_priv); 2942 if (timeout) 2943 args->timeout_ns = timespec_to_ns(timeout); 2944 return ret; 2945 2946 out: 2947 drm_gem_object_unreference(&obj->base); 2948 mutex_unlock(&dev->struct_mutex); 2949 return ret; 2950 } 2951 2952 /** 2953 * i915_gem_object_sync - sync an object to a ring. 2954 * 2955 * @obj: object which may be in use on another ring. 2956 * @to: ring we wish to use the object on. May be NULL. 2957 * 2958 * This code is meant to abstract object synchronization with the GPU. 2959 * Calling with NULL implies synchronizing the object with the CPU 2960 * rather than a particular GPU ring. 2961 * 2962 * Returns 0 if successful, else propagates up the lower layer error. 2963 */ 2964 int 2965 i915_gem_object_sync(struct drm_i915_gem_object *obj, 2966 struct intel_engine_cs *to) 2967 { 2968 struct intel_engine_cs *from = obj->ring; 2969 u32 seqno; 2970 int ret, idx; 2971 2972 if (from == NULL || to == from) 2973 return 0; 2974 2975 if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev)) 2976 return i915_gem_object_wait_rendering(obj, false); 2977 2978 idx = intel_ring_sync_index(from, to); 2979 2980 seqno = obj->last_read_seqno; 2981 /* Optimization: Avoid semaphore sync when we are sure we already 2982 * waited for an object with higher seqno */ 2983 if (seqno <= from->semaphore.sync_seqno[idx]) 2984 return 0; 2985 2986 ret = i915_gem_check_olr(obj->ring, seqno); 2987 if (ret) 2988 return ret; 2989 2990 trace_i915_gem_ring_sync_to(from, to, seqno); 2991 ret = to->semaphore.sync_to(to, from, seqno); 2992 if (!ret) 2993 /* We use last_read_seqno because sync_to() 2994 * might have just caused seqno wrap under 2995 * the radar. 2996 */ 2997 from->semaphore.sync_seqno[idx] = obj->last_read_seqno; 2998 2999 return ret; 3000 } 3001 3002 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) 3003 { 3004 u32 old_write_domain, old_read_domains; 3005 3006 /* Force a pagefault for domain tracking on next user access */ 3007 i915_gem_release_mmap(obj); 3008 3009 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3010 return; 3011 3012 /* Wait for any direct GTT access to complete */ 3013 mb(); 3014 3015 old_read_domains = obj->base.read_domains; 3016 old_write_domain = obj->base.write_domain; 3017 3018 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT; 3019 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT; 3020 3021 trace_i915_gem_object_change_domain(obj, 3022 old_read_domains, 3023 old_write_domain); 3024 } 3025 3026 int i915_vma_unbind(struct i915_vma *vma) 3027 { 3028 struct drm_i915_gem_object *obj = vma->obj; 3029 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3030 int ret; 3031 3032 if (list_empty(&vma->vma_link)) 3033 return 0; 3034 3035 if (!drm_mm_node_allocated(&vma->node)) { 3036 i915_gem_vma_destroy(vma); 3037 return 0; 3038 } 3039 3040 if (vma->pin_count) 3041 return -EBUSY; 3042 3043 BUG_ON(obj->pages == NULL); 3044 3045 ret = i915_gem_object_finish_gpu(obj); 3046 if (ret) 3047 return ret; 3048 /* Continue on if we fail due to EIO, the GPU is hung so we 3049 * should be safe and we need to cleanup or else we might 3050 * cause memory corruption through use-after-free. 3051 */ 3052 3053 if (i915_is_ggtt(vma->vm)) { 3054 i915_gem_object_finish_gtt(obj); 3055 3056 /* release the fence reg _after_ flushing */ 3057 ret = i915_gem_object_put_fence(obj); 3058 if (ret) 3059 return ret; 3060 } 3061 3062 trace_i915_vma_unbind(vma); 3063 3064 vma->unbind_vma(vma); 3065 3066 list_del_init(&vma->mm_list); 3067 /* Avoid an unnecessary call to unbind on rebind. */ 3068 if (i915_is_ggtt(vma->vm)) 3069 obj->map_and_fenceable = true; 3070 3071 drm_mm_remove_node(&vma->node); 3072 i915_gem_vma_destroy(vma); 3073 3074 /* Since the unbound list is global, only move to that list if 3075 * no more VMAs exist. */ 3076 if (list_empty(&obj->vma_list)) { 3077 i915_gem_gtt_finish_object(obj); 3078 list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list); 3079 } 3080 3081 /* And finally now the object is completely decoupled from this vma, 3082 * we can drop its hold on the backing storage and allow it to be 3083 * reaped by the shrinker. 3084 */ 3085 i915_gem_object_unpin_pages(obj); 3086 3087 return 0; 3088 } 3089 3090 int i915_gpu_idle(struct drm_device *dev) 3091 { 3092 struct drm_i915_private *dev_priv = dev->dev_private; 3093 struct intel_engine_cs *ring; 3094 int ret, i; 3095 3096 /* Flush everything onto the inactive list. */ 3097 for_each_ring(ring, dev_priv, i) { 3098 ret = i915_switch_context(ring, ring->default_context); 3099 if (ret) 3100 return ret; 3101 3102 ret = intel_ring_idle(ring); 3103 if (ret) 3104 return ret; 3105 } 3106 3107 return 0; 3108 } 3109 3110 static void i965_write_fence_reg(struct drm_device *dev, int reg, 3111 struct drm_i915_gem_object *obj) 3112 { 3113 struct drm_i915_private *dev_priv = dev->dev_private; 3114 int fence_reg; 3115 int fence_pitch_shift; 3116 3117 if (INTEL_INFO(dev)->gen >= 6) { 3118 fence_reg = FENCE_REG_SANDYBRIDGE_0; 3119 fence_pitch_shift = SANDYBRIDGE_FENCE_PITCH_SHIFT; 3120 } else { 3121 fence_reg = FENCE_REG_965_0; 3122 fence_pitch_shift = I965_FENCE_PITCH_SHIFT; 3123 } 3124 3125 fence_reg += reg * 8; 3126 3127 /* To w/a incoherency with non-atomic 64-bit register updates, 3128 * we split the 64-bit update into two 32-bit writes. In order 3129 * for a partial fence not to be evaluated between writes, we 3130 * precede the update with write to turn off the fence register, 3131 * and only enable the fence as the last step. 3132 * 3133 * For extra levels of paranoia, we make sure each step lands 3134 * before applying the next step. 3135 */ 3136 I915_WRITE(fence_reg, 0); 3137 POSTING_READ(fence_reg); 3138 3139 if (obj) { 3140 u32 size = i915_gem_obj_ggtt_size(obj); 3141 uint64_t val; 3142 3143 val = (uint64_t)((i915_gem_obj_ggtt_offset(obj) + size - 4096) & 3144 0xfffff000) << 32; 3145 val |= i915_gem_obj_ggtt_offset(obj) & 0xfffff000; 3146 val |= (uint64_t)((obj->stride / 128) - 1) << fence_pitch_shift; 3147 if (obj->tiling_mode == I915_TILING_Y) 3148 val |= 1 << I965_FENCE_TILING_Y_SHIFT; 3149 val |= I965_FENCE_REG_VALID; 3150 3151 I915_WRITE(fence_reg + 4, val >> 32); 3152 POSTING_READ(fence_reg + 4); 3153 3154 I915_WRITE(fence_reg + 0, val); 3155 POSTING_READ(fence_reg); 3156 } else { 3157 I915_WRITE(fence_reg + 4, 0); 3158 POSTING_READ(fence_reg + 4); 3159 } 3160 } 3161 3162 static void i915_write_fence_reg(struct drm_device *dev, int reg, 3163 struct drm_i915_gem_object *obj) 3164 { 3165 struct drm_i915_private *dev_priv = dev->dev_private; 3166 u32 val; 3167 3168 if (obj) { 3169 u32 size = i915_gem_obj_ggtt_size(obj); 3170 int pitch_val; 3171 int tile_width; 3172 3173 WARN((i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK) || 3174 (size & -size) != size || 3175 (i915_gem_obj_ggtt_offset(obj) & (size - 1)), 3176 "object 0x%08lx [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n", 3177 i915_gem_obj_ggtt_offset(obj), obj->map_and_fenceable, size); 3178 3179 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)) 3180 tile_width = 128; 3181 else 3182 tile_width = 512; 3183 3184 /* Note: pitch better be a power of two tile widths */ 3185 pitch_val = obj->stride / tile_width; 3186 pitch_val = ffs(pitch_val) - 1; 3187 3188 val = i915_gem_obj_ggtt_offset(obj); 3189 if (obj->tiling_mode == I915_TILING_Y) 3190 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 3191 val |= I915_FENCE_SIZE_BITS(size); 3192 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 3193 val |= I830_FENCE_REG_VALID; 3194 } else 3195 val = 0; 3196 3197 if (reg < 8) 3198 reg = FENCE_REG_830_0 + reg * 4; 3199 else 3200 reg = FENCE_REG_945_8 + (reg - 8) * 4; 3201 3202 I915_WRITE(reg, val); 3203 POSTING_READ(reg); 3204 } 3205 3206 static void i830_write_fence_reg(struct drm_device *dev, int reg, 3207 struct drm_i915_gem_object *obj) 3208 { 3209 struct drm_i915_private *dev_priv = dev->dev_private; 3210 uint32_t val; 3211 3212 if (obj) { 3213 u32 size = i915_gem_obj_ggtt_size(obj); 3214 uint32_t pitch_val; 3215 3216 WARN((i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK) || 3217 (size & -size) != size || 3218 (i915_gem_obj_ggtt_offset(obj) & (size - 1)), 3219 "object 0x%08lx not 512K or pot-size 0x%08x aligned\n", 3220 i915_gem_obj_ggtt_offset(obj), size); 3221 3222 pitch_val = obj->stride / 128; 3223 pitch_val = ffs(pitch_val) - 1; 3224 3225 val = i915_gem_obj_ggtt_offset(obj); 3226 if (obj->tiling_mode == I915_TILING_Y) 3227 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 3228 val |= I830_FENCE_SIZE_BITS(size); 3229 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 3230 val |= I830_FENCE_REG_VALID; 3231 } else 3232 val = 0; 3233 3234 I915_WRITE(FENCE_REG_830_0 + reg * 4, val); 3235 POSTING_READ(FENCE_REG_830_0 + reg * 4); 3236 } 3237 3238 inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj) 3239 { 3240 return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT; 3241 } 3242 3243 static void i915_gem_write_fence(struct drm_device *dev, int reg, 3244 struct drm_i915_gem_object *obj) 3245 { 3246 struct drm_i915_private *dev_priv = dev->dev_private; 3247 3248 /* Ensure that all CPU reads are completed before installing a fence 3249 * and all writes before removing the fence. 3250 */ 3251 if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj)) 3252 mb(); 3253 3254 WARN(obj && (!obj->stride || !obj->tiling_mode), 3255 "bogus fence setup with stride: 0x%x, tiling mode: %i\n", 3256 obj->stride, obj->tiling_mode); 3257 3258 switch (INTEL_INFO(dev)->gen) { 3259 case 8: 3260 case 7: 3261 case 6: 3262 case 5: 3263 case 4: i965_write_fence_reg(dev, reg, obj); break; 3264 case 3: i915_write_fence_reg(dev, reg, obj); break; 3265 case 2: i830_write_fence_reg(dev, reg, obj); break; 3266 default: BUG(); 3267 } 3268 3269 /* And similarly be paranoid that no direct access to this region 3270 * is reordered to before the fence is installed. 3271 */ 3272 if (i915_gem_object_needs_mb(obj)) 3273 mb(); 3274 } 3275 3276 static inline int fence_number(struct drm_i915_private *dev_priv, 3277 struct drm_i915_fence_reg *fence) 3278 { 3279 return fence - dev_priv->fence_regs; 3280 } 3281 3282 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, 3283 struct drm_i915_fence_reg *fence, 3284 bool enable) 3285 { 3286 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3287 int reg = fence_number(dev_priv, fence); 3288 3289 i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL); 3290 3291 if (enable) { 3292 obj->fence_reg = reg; 3293 fence->obj = obj; 3294 list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list); 3295 } else { 3296 obj->fence_reg = I915_FENCE_REG_NONE; 3297 fence->obj = NULL; 3298 list_del_init(&fence->lru_list); 3299 } 3300 obj->fence_dirty = false; 3301 } 3302 3303 static int 3304 i915_gem_object_wait_fence(struct drm_i915_gem_object *obj) 3305 { 3306 if (obj->last_fenced_seqno) { 3307 int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno); 3308 if (ret) 3309 return ret; 3310 3311 obj->last_fenced_seqno = 0; 3312 } 3313 3314 obj->fenced_gpu_access = false; 3315 return 0; 3316 } 3317 3318 int 3319 i915_gem_object_put_fence(struct drm_i915_gem_object *obj) 3320 { 3321 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3322 struct drm_i915_fence_reg *fence; 3323 int ret; 3324 3325 ret = i915_gem_object_wait_fence(obj); 3326 if (ret) 3327 return ret; 3328 3329 if (obj->fence_reg == I915_FENCE_REG_NONE) 3330 return 0; 3331 3332 fence = &dev_priv->fence_regs[obj->fence_reg]; 3333 3334 if (WARN_ON(fence->pin_count)) 3335 return -EBUSY; 3336 3337 i915_gem_object_fence_lost(obj); 3338 i915_gem_object_update_fence(obj, fence, false); 3339 3340 return 0; 3341 } 3342 3343 static struct drm_i915_fence_reg * 3344 i915_find_fence_reg(struct drm_device *dev) 3345 { 3346 struct drm_i915_private *dev_priv = dev->dev_private; 3347 struct drm_i915_fence_reg *reg, *avail; 3348 int i; 3349 3350 /* First try to find a free reg */ 3351 avail = NULL; 3352 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) { 3353 reg = &dev_priv->fence_regs[i]; 3354 if (!reg->obj) 3355 return reg; 3356 3357 if (!reg->pin_count) 3358 avail = reg; 3359 } 3360 3361 if (avail == NULL) 3362 goto deadlock; 3363 3364 /* None available, try to steal one or wait for a user to finish */ 3365 list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) { 3366 if (reg->pin_count) 3367 continue; 3368 3369 return reg; 3370 } 3371 3372 deadlock: 3373 /* Wait for completion of pending flips which consume fences */ 3374 if (intel_has_pending_fb_unpin(dev)) 3375 return ERR_PTR(-EAGAIN); 3376 3377 return ERR_PTR(-EDEADLK); 3378 } 3379 3380 /** 3381 * i915_gem_object_get_fence - set up fencing for an object 3382 * @obj: object to map through a fence reg 3383 * 3384 * When mapping objects through the GTT, userspace wants to be able to write 3385 * to them without having to worry about swizzling if the object is tiled. 3386 * This function walks the fence regs looking for a free one for @obj, 3387 * stealing one if it can't find any. 3388 * 3389 * It then sets up the reg based on the object's properties: address, pitch 3390 * and tiling format. 3391 * 3392 * For an untiled surface, this removes any existing fence. 3393 */ 3394 int 3395 i915_gem_object_get_fence(struct drm_i915_gem_object *obj) 3396 { 3397 struct drm_device *dev = obj->base.dev; 3398 struct drm_i915_private *dev_priv = dev->dev_private; 3399 bool enable = obj->tiling_mode != I915_TILING_NONE; 3400 struct drm_i915_fence_reg *reg; 3401 int ret; 3402 3403 /* Have we updated the tiling parameters upon the object and so 3404 * will need to serialise the write to the associated fence register? 3405 */ 3406 if (obj->fence_dirty) { 3407 ret = i915_gem_object_wait_fence(obj); 3408 if (ret) 3409 return ret; 3410 } 3411 3412 /* Just update our place in the LRU if our fence is getting reused. */ 3413 if (obj->fence_reg != I915_FENCE_REG_NONE) { 3414 reg = &dev_priv->fence_regs[obj->fence_reg]; 3415 if (!obj->fence_dirty) { 3416 list_move_tail(®->lru_list, 3417 &dev_priv->mm.fence_list); 3418 return 0; 3419 } 3420 } else if (enable) { 3421 reg = i915_find_fence_reg(dev); 3422 if (IS_ERR(reg)) 3423 return PTR_ERR(reg); 3424 3425 if (reg->obj) { 3426 struct drm_i915_gem_object *old = reg->obj; 3427 3428 ret = i915_gem_object_wait_fence(old); 3429 if (ret) 3430 return ret; 3431 3432 i915_gem_object_fence_lost(old); 3433 } 3434 } else 3435 return 0; 3436 3437 i915_gem_object_update_fence(obj, reg, enable); 3438 3439 return 0; 3440 } 3441 3442 static bool i915_gem_valid_gtt_space(struct drm_device *dev, 3443 struct drm_mm_node *gtt_space, 3444 unsigned long cache_level) 3445 { 3446 struct drm_mm_node *other; 3447 3448 /* On non-LLC machines we have to be careful when putting differing 3449 * types of snoopable memory together to avoid the prefetcher 3450 * crossing memory domains and dying. 3451 */ 3452 if (HAS_LLC(dev)) 3453 return true; 3454 3455 if (!drm_mm_node_allocated(gtt_space)) 3456 return true; 3457 3458 if (list_empty(>t_space->node_list)) 3459 return true; 3460 3461 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list); 3462 if (other->allocated && !other->hole_follows && other->color != cache_level) 3463 return false; 3464 3465 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list); 3466 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level) 3467 return false; 3468 3469 return true; 3470 } 3471 3472 static void i915_gem_verify_gtt(struct drm_device *dev) 3473 { 3474 #if WATCH_GTT 3475 struct drm_i915_private *dev_priv = dev->dev_private; 3476 struct drm_i915_gem_object *obj; 3477 int err = 0; 3478 3479 list_for_each_entry(obj, &dev_priv->mm.gtt_list, global_list) { 3480 if (obj->gtt_space == NULL) { 3481 printk(KERN_ERR "object found on GTT list with no space reserved\n"); 3482 err++; 3483 continue; 3484 } 3485 3486 if (obj->cache_level != obj->gtt_space->color) { 3487 printk(KERN_ERR "object reserved space [%08lx, %08lx] with wrong color, cache_level=%x, color=%lx\n", 3488 i915_gem_obj_ggtt_offset(obj), 3489 i915_gem_obj_ggtt_offset(obj) + i915_gem_obj_ggtt_size(obj), 3490 obj->cache_level, 3491 obj->gtt_space->color); 3492 err++; 3493 continue; 3494 } 3495 3496 if (!i915_gem_valid_gtt_space(dev, 3497 obj->gtt_space, 3498 obj->cache_level)) { 3499 printk(KERN_ERR "invalid GTT space found at [%08lx, %08lx] - color=%x\n", 3500 i915_gem_obj_ggtt_offset(obj), 3501 i915_gem_obj_ggtt_offset(obj) + i915_gem_obj_ggtt_size(obj), 3502 obj->cache_level); 3503 err++; 3504 continue; 3505 } 3506 } 3507 3508 WARN_ON(err); 3509 #endif 3510 } 3511 3512 /** 3513 * Finds free space in the GTT aperture and binds the object there. 3514 */ 3515 static struct i915_vma * 3516 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, 3517 struct i915_address_space *vm, 3518 unsigned alignment, 3519 uint64_t flags) 3520 { 3521 struct drm_device *dev = obj->base.dev; 3522 struct drm_i915_private *dev_priv = dev->dev_private; 3523 u32 size, fence_size, fence_alignment, unfenced_alignment; 3524 unsigned long start = 3525 flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; 3526 unsigned long end = 3527 flags & PIN_MAPPABLE ? dev_priv->gtt.mappable_end : vm->total; 3528 struct i915_vma *vma; 3529 int ret; 3530 3531 fence_size = i915_gem_get_gtt_size(dev, 3532 obj->base.size, 3533 obj->tiling_mode); 3534 fence_alignment = i915_gem_get_gtt_alignment(dev, 3535 obj->base.size, 3536 obj->tiling_mode, true); 3537 unfenced_alignment = 3538 i915_gem_get_gtt_alignment(dev, 3539 obj->base.size, 3540 obj->tiling_mode, false); 3541 3542 if (alignment == 0) 3543 alignment = flags & PIN_MAPPABLE ? fence_alignment : 3544 unfenced_alignment; 3545 if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) { 3546 DRM_DEBUG("Invalid object alignment requested %u\n", alignment); 3547 return ERR_PTR(-EINVAL); 3548 } 3549 3550 size = flags & PIN_MAPPABLE ? fence_size : obj->base.size; 3551 3552 /* If the object is bigger than the entire aperture, reject it early 3553 * before evicting everything in a vain attempt to find space. 3554 */ 3555 if (obj->base.size > end) { 3556 DRM_DEBUG("Attempting to bind an object larger than the aperture: object=%zd > %s aperture=%lu\n", 3557 obj->base.size, 3558 flags & PIN_MAPPABLE ? "mappable" : "total", 3559 end); 3560 return ERR_PTR(-E2BIG); 3561 } 3562 3563 ret = i915_gem_object_get_pages(obj); 3564 if (ret) 3565 return ERR_PTR(ret); 3566 3567 i915_gem_object_pin_pages(obj); 3568 3569 vma = i915_gem_obj_lookup_or_create_vma(obj, vm); 3570 if (IS_ERR(vma)) 3571 goto err_unpin; 3572 3573 search_free: 3574 ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, 3575 size, alignment, 3576 obj->cache_level, 3577 start, end, 3578 DRM_MM_SEARCH_DEFAULT, 3579 DRM_MM_CREATE_DEFAULT); 3580 if (ret) { 3581 ret = i915_gem_evict_something(dev, vm, size, alignment, 3582 obj->cache_level, 3583 start, end, 3584 flags); 3585 if (ret == 0) 3586 goto search_free; 3587 3588 goto err_free_vma; 3589 } 3590 if (WARN_ON(!i915_gem_valid_gtt_space(dev, &vma->node, 3591 obj->cache_level))) { 3592 ret = -EINVAL; 3593 goto err_remove_node; 3594 } 3595 3596 ret = i915_gem_gtt_prepare_object(obj); 3597 if (ret) 3598 goto err_remove_node; 3599 3600 list_move_tail(&obj->global_list, &dev_priv->mm.bound_list); 3601 list_add_tail(&vma->mm_list, &vm->inactive_list); 3602 3603 if (i915_is_ggtt(vm)) { 3604 bool mappable, fenceable; 3605 3606 fenceable = (vma->node.size == fence_size && 3607 (vma->node.start & (fence_alignment - 1)) == 0); 3608 3609 mappable = (vma->node.start + obj->base.size <= 3610 dev_priv->gtt.mappable_end); 3611 3612 obj->map_and_fenceable = mappable && fenceable; 3613 } 3614 3615 WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable); 3616 3617 trace_i915_vma_bind(vma, flags); 3618 vma->bind_vma(vma, obj->cache_level, 3619 flags & (PIN_MAPPABLE | PIN_GLOBAL) ? GLOBAL_BIND : 0); 3620 3621 i915_gem_verify_gtt(dev); 3622 return vma; 3623 3624 err_remove_node: 3625 drm_mm_remove_node(&vma->node); 3626 err_free_vma: 3627 i915_gem_vma_destroy(vma); 3628 vma = ERR_PTR(ret); 3629 err_unpin: 3630 i915_gem_object_unpin_pages(obj); 3631 return vma; 3632 } 3633 3634 bool 3635 i915_gem_clflush_object(struct drm_i915_gem_object *obj, 3636 bool force) 3637 { 3638 /* If we don't have a page list set up, then we're not pinned 3639 * to GPU, and we can ignore the cache flush because it'll happen 3640 * again at bind time. 3641 */ 3642 if (obj->pages == NULL) 3643 return false; 3644 3645 /* 3646 * Stolen memory is always coherent with the GPU as it is explicitly 3647 * marked as wc by the system, or the system is cache-coherent. 3648 */ 3649 if (obj->stolen) 3650 return false; 3651 3652 /* If the GPU is snooping the contents of the CPU cache, 3653 * we do not need to manually clear the CPU cache lines. However, 3654 * the caches are only snooped when the render cache is 3655 * flushed/invalidated. As we always have to emit invalidations 3656 * and flushes when moving into and out of the RENDER domain, correct 3657 * snooping behaviour occurs naturally as the result of our domain 3658 * tracking. 3659 */ 3660 if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) 3661 return false; 3662 3663 trace_i915_gem_object_clflush(obj); 3664 drm_clflush_pages(obj->pages, obj->base.size / PAGE_SIZE); 3665 3666 return true; 3667 } 3668 3669 /** Flushes the GTT write domain for the object if it's dirty. */ 3670 static void 3671 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) 3672 { 3673 uint32_t old_write_domain; 3674 3675 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) 3676 return; 3677 3678 /* No actual flushing is required for the GTT write domain. Writes 3679 * to it immediately go to main memory as far as we know, so there's 3680 * no chipset flush. It also doesn't land in render cache. 3681 * 3682 * However, we do have to enforce the order so that all writes through 3683 * the GTT land before any writes to the device, such as updates to 3684 * the GATT itself. 3685 */ 3686 wmb(); 3687 3688 old_write_domain = obj->base.write_domain; 3689 obj->base.write_domain = 0; 3690 3691 intel_fb_obj_flush(obj, false); 3692 3693 intel_fb_obj_flush(obj, false); 3694 3695 trace_i915_gem_object_change_domain(obj, 3696 obj->base.read_domains, 3697 old_write_domain); 3698 } 3699 3700 /** Flushes the CPU write domain for the object if it's dirty. */ 3701 static void 3702 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj, 3703 bool force) 3704 { 3705 uint32_t old_write_domain; 3706 3707 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) 3708 return; 3709 3710 if (i915_gem_clflush_object(obj, force)) 3711 i915_gem_chipset_flush(obj->base.dev); 3712 3713 old_write_domain = obj->base.write_domain; 3714 obj->base.write_domain = 0; 3715 3716 trace_i915_gem_object_change_domain(obj, 3717 obj->base.read_domains, 3718 old_write_domain); 3719 } 3720 3721 /** 3722 * Moves a single object to the GTT read, and possibly write domain. 3723 * 3724 * This function returns when the move is complete, including waiting on 3725 * flushes to occur. 3726 */ 3727 int 3728 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 3729 { 3730 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3731 uint32_t old_write_domain, old_read_domains; 3732 int ret; 3733 3734 /* Not valid to be called on unbound objects. */ 3735 if (!i915_gem_obj_bound_any(obj)) 3736 return -EINVAL; 3737 3738 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) 3739 return 0; 3740 3741 ret = i915_gem_object_wait_rendering(obj, !write); 3742 if (ret) 3743 return ret; 3744 3745 i915_gem_object_retire(obj); 3746 i915_gem_object_flush_cpu_write_domain(obj, false); 3747 3748 /* Serialise direct access to this object with the barriers for 3749 * coherent writes from the GPU, by effectively invalidating the 3750 * GTT domain upon first access. 3751 */ 3752 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3753 mb(); 3754 3755 old_write_domain = obj->base.write_domain; 3756 old_read_domains = obj->base.read_domains; 3757 3758 /* It should now be out of any other write domains, and we can update 3759 * the domain values for our changes. 3760 */ 3761 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 3762 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 3763 if (write) { 3764 obj->base.read_domains = I915_GEM_DOMAIN_GTT; 3765 obj->base.write_domain = I915_GEM_DOMAIN_GTT; 3766 obj->dirty = 1; 3767 } 3768 3769 if (write) 3770 intel_fb_obj_invalidate(obj, NULL); 3771 3772 trace_i915_gem_object_change_domain(obj, 3773 old_read_domains, 3774 old_write_domain); 3775 3776 /* And bump the LRU for this access */ 3777 if (i915_gem_object_is_inactive(obj)) { 3778 struct i915_vma *vma = i915_gem_obj_to_ggtt(obj); 3779 if (vma) 3780 list_move_tail(&vma->mm_list, 3781 &dev_priv->gtt.base.inactive_list); 3782 3783 } 3784 3785 return 0; 3786 } 3787 3788 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 3789 enum i915_cache_level cache_level) 3790 { 3791 struct drm_device *dev = obj->base.dev; 3792 struct i915_vma *vma, *next; 3793 int ret; 3794 3795 if (obj->cache_level == cache_level) 3796 return 0; 3797 3798 if (i915_gem_obj_is_pinned(obj)) { 3799 DRM_DEBUG("can not change the cache level of pinned objects\n"); 3800 return -EBUSY; 3801 } 3802 3803 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) { 3804 if (!i915_gem_valid_gtt_space(dev, &vma->node, cache_level)) { 3805 ret = i915_vma_unbind(vma); 3806 if (ret) 3807 return ret; 3808 } 3809 } 3810 3811 if (i915_gem_obj_bound_any(obj)) { 3812 ret = i915_gem_object_finish_gpu(obj); 3813 if (ret) 3814 return ret; 3815 3816 i915_gem_object_finish_gtt(obj); 3817 3818 /* Before SandyBridge, you could not use tiling or fence 3819 * registers with snooped memory, so relinquish any fences 3820 * currently pointing to our region in the aperture. 3821 */ 3822 if (INTEL_INFO(dev)->gen < 6) { 3823 ret = i915_gem_object_put_fence(obj); 3824 if (ret) 3825 return ret; 3826 } 3827 3828 list_for_each_entry(vma, &obj->vma_list, vma_link) 3829 if (drm_mm_node_allocated(&vma->node)) 3830 vma->bind_vma(vma, cache_level, 3831 obj->has_global_gtt_mapping ? GLOBAL_BIND : 0); 3832 } 3833 3834 list_for_each_entry(vma, &obj->vma_list, vma_link) 3835 vma->node.color = cache_level; 3836 obj->cache_level = cache_level; 3837 3838 if (cpu_write_needs_clflush(obj)) { 3839 u32 old_read_domains, old_write_domain; 3840 3841 /* If we're coming from LLC cached, then we haven't 3842 * actually been tracking whether the data is in the 3843 * CPU cache or not, since we only allow one bit set 3844 * in obj->write_domain and have been skipping the clflushes. 3845 * Just set it to the CPU cache for now. 3846 */ 3847 i915_gem_object_retire(obj); 3848 WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU); 3849 3850 old_read_domains = obj->base.read_domains; 3851 old_write_domain = obj->base.write_domain; 3852 3853 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 3854 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 3855 3856 trace_i915_gem_object_change_domain(obj, 3857 old_read_domains, 3858 old_write_domain); 3859 } 3860 3861 i915_gem_verify_gtt(dev); 3862 return 0; 3863 } 3864 3865 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 3866 struct drm_file *file) 3867 { 3868 struct drm_i915_gem_caching *args = data; 3869 struct drm_i915_gem_object *obj; 3870 int ret; 3871 3872 ret = i915_mutex_lock_interruptible(dev); 3873 if (ret) 3874 return ret; 3875 3876 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3877 if (&obj->base == NULL) { 3878 ret = -ENOENT; 3879 goto unlock; 3880 } 3881 3882 switch (obj->cache_level) { 3883 case I915_CACHE_LLC: 3884 case I915_CACHE_L3_LLC: 3885 args->caching = I915_CACHING_CACHED; 3886 break; 3887 3888 case I915_CACHE_WT: 3889 args->caching = I915_CACHING_DISPLAY; 3890 break; 3891 3892 default: 3893 args->caching = I915_CACHING_NONE; 3894 break; 3895 } 3896 3897 drm_gem_object_unreference(&obj->base); 3898 unlock: 3899 mutex_unlock(&dev->struct_mutex); 3900 return ret; 3901 } 3902 3903 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 3904 struct drm_file *file) 3905 { 3906 struct drm_i915_gem_caching *args = data; 3907 struct drm_i915_gem_object *obj; 3908 enum i915_cache_level level; 3909 int ret; 3910 3911 switch (args->caching) { 3912 case I915_CACHING_NONE: 3913 level = I915_CACHE_NONE; 3914 break; 3915 case I915_CACHING_CACHED: 3916 level = I915_CACHE_LLC; 3917 break; 3918 case I915_CACHING_DISPLAY: 3919 level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE; 3920 break; 3921 default: 3922 return -EINVAL; 3923 } 3924 3925 ret = i915_mutex_lock_interruptible(dev); 3926 if (ret) 3927 return ret; 3928 3929 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3930 if (&obj->base == NULL) { 3931 ret = -ENOENT; 3932 goto unlock; 3933 } 3934 3935 ret = i915_gem_object_set_cache_level(obj, level); 3936 3937 drm_gem_object_unreference(&obj->base); 3938 unlock: 3939 mutex_unlock(&dev->struct_mutex); 3940 return ret; 3941 } 3942 3943 static bool is_pin_display(struct drm_i915_gem_object *obj) 3944 { 3945 struct i915_vma *vma; 3946 3947 if (list_empty(&obj->vma_list)) 3948 return false; 3949 3950 vma = i915_gem_obj_to_ggtt(obj); 3951 if (!vma) 3952 return false; 3953 3954 /* There are 3 sources that pin objects: 3955 * 1. The display engine (scanouts, sprites, cursors); 3956 * 2. Reservations for execbuffer; 3957 * 3. The user. 3958 * 3959 * We can ignore reservations as we hold the struct_mutex and 3960 * are only called outside of the reservation path. The user 3961 * can only increment pin_count once, and so if after 3962 * subtracting the potential reference by the user, any pin_count 3963 * remains, it must be due to another use by the display engine. 3964 */ 3965 return vma->pin_count - !!obj->user_pin_count; 3966 } 3967 3968 /* 3969 * Prepare buffer for display plane (scanout, cursors, etc). 3970 * Can be called from an uninterruptible phase (modesetting) and allows 3971 * any flushes to be pipelined (for pageflips). 3972 */ 3973 int 3974 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 3975 u32 alignment, 3976 struct intel_engine_cs *pipelined) 3977 { 3978 u32 old_read_domains, old_write_domain; 3979 bool was_pin_display; 3980 int ret; 3981 3982 if (pipelined != obj->ring) { 3983 ret = i915_gem_object_sync(obj, pipelined); 3984 if (ret) 3985 return ret; 3986 } 3987 3988 /* Mark the pin_display early so that we account for the 3989 * display coherency whilst setting up the cache domains. 3990 */ 3991 was_pin_display = obj->pin_display; 3992 obj->pin_display = true; 3993 3994 /* The display engine is not coherent with the LLC cache on gen6. As 3995 * a result, we make sure that the pinning that is about to occur is 3996 * done with uncached PTEs. This is lowest common denominator for all 3997 * chipsets. 3998 * 3999 * However for gen6+, we could do better by using the GFDT bit instead 4000 * of uncaching, which would allow us to flush all the LLC-cached data 4001 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 4002 */ 4003 ret = i915_gem_object_set_cache_level(obj, 4004 HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE); 4005 if (ret) 4006 goto err_unpin_display; 4007 4008 /* As the user may map the buffer once pinned in the display plane 4009 * (e.g. libkms for the bootup splash), we have to ensure that we 4010 * always use map_and_fenceable for all scanout buffers. 4011 */ 4012 ret = i915_gem_obj_ggtt_pin(obj, alignment, PIN_MAPPABLE); 4013 if (ret) 4014 goto err_unpin_display; 4015 4016 i915_gem_object_flush_cpu_write_domain(obj, true); 4017 4018 old_write_domain = obj->base.write_domain; 4019 old_read_domains = obj->base.read_domains; 4020 4021 /* It should now be out of any other write domains, and we can update 4022 * the domain values for our changes. 4023 */ 4024 obj->base.write_domain = 0; 4025 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 4026 4027 trace_i915_gem_object_change_domain(obj, 4028 old_read_domains, 4029 old_write_domain); 4030 4031 return 0; 4032 4033 err_unpin_display: 4034 WARN_ON(was_pin_display != is_pin_display(obj)); 4035 obj->pin_display = was_pin_display; 4036 return ret; 4037 } 4038 4039 void 4040 i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj) 4041 { 4042 i915_gem_object_ggtt_unpin(obj); 4043 obj->pin_display = is_pin_display(obj); 4044 } 4045 4046 int 4047 i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj) 4048 { 4049 int ret; 4050 4051 if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0) 4052 return 0; 4053 4054 ret = i915_gem_object_wait_rendering(obj, false); 4055 if (ret) 4056 return ret; 4057 4058 /* Ensure that we invalidate the GPU's caches and TLBs. */ 4059 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS; 4060 return 0; 4061 } 4062 4063 /** 4064 * Moves a single object to the CPU read, and possibly write domain. 4065 * 4066 * This function returns when the move is complete, including waiting on 4067 * flushes to occur. 4068 */ 4069 int 4070 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 4071 { 4072 uint32_t old_write_domain, old_read_domains; 4073 int ret; 4074 4075 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) 4076 return 0; 4077 4078 ret = i915_gem_object_wait_rendering(obj, !write); 4079 if (ret) 4080 return ret; 4081 4082 i915_gem_object_retire(obj); 4083 i915_gem_object_flush_gtt_write_domain(obj); 4084 4085 old_write_domain = obj->base.write_domain; 4086 old_read_domains = obj->base.read_domains; 4087 4088 /* Flush the CPU cache if it's still invalid. */ 4089 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { 4090 i915_gem_clflush_object(obj, false); 4091 4092 obj->base.read_domains |= I915_GEM_DOMAIN_CPU; 4093 } 4094 4095 /* It should now be out of any other write domains, and we can update 4096 * the domain values for our changes. 4097 */ 4098 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 4099 4100 /* If we're writing through the CPU, then the GPU read domains will 4101 * need to be invalidated at next use. 4102 */ 4103 if (write) { 4104 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4105 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4106 } 4107 4108 if (write) 4109 intel_fb_obj_invalidate(obj, NULL); 4110 4111 trace_i915_gem_object_change_domain(obj, 4112 old_read_domains, 4113 old_write_domain); 4114 4115 return 0; 4116 } 4117 4118 /* Throttle our rendering by waiting until the ring has completed our requests 4119 * emitted over 20 msec ago. 4120 * 4121 * Note that if we were to use the current jiffies each time around the loop, 4122 * we wouldn't escape the function with any frames outstanding if the time to 4123 * render a frame was over 20ms. 4124 * 4125 * This should get us reasonable parallelism between CPU and GPU but also 4126 * relatively low latency when blocking on a particular request to finish. 4127 */ 4128 static int 4129 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 4130 { 4131 struct drm_i915_private *dev_priv = dev->dev_private; 4132 struct drm_i915_file_private *file_priv = file->driver_priv; 4133 unsigned long recent_enough = jiffies - msecs_to_jiffies(20); 4134 struct drm_i915_gem_request *request; 4135 struct intel_engine_cs *ring = NULL; 4136 unsigned reset_counter; 4137 u32 seqno = 0; 4138 int ret; 4139 4140 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 4141 if (ret) 4142 return ret; 4143 4144 ret = i915_gem_check_wedge(&dev_priv->gpu_error, false); 4145 if (ret) 4146 return ret; 4147 4148 spin_lock(&file_priv->mm.lock); 4149 list_for_each_entry(request, &file_priv->mm.request_list, client_list) { 4150 if (time_after_eq(request->emitted_jiffies, recent_enough)) 4151 break; 4152 4153 ring = request->ring; 4154 seqno = request->seqno; 4155 } 4156 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 4157 spin_unlock(&file_priv->mm.lock); 4158 4159 if (seqno == 0) 4160 return 0; 4161 4162 ret = __wait_seqno(ring, seqno, reset_counter, true, NULL, NULL); 4163 if (ret == 0) 4164 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0); 4165 4166 return ret; 4167 } 4168 4169 static bool 4170 i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags) 4171 { 4172 struct drm_i915_gem_object *obj = vma->obj; 4173 4174 if (alignment && 4175 vma->node.start & (alignment - 1)) 4176 return true; 4177 4178 if (flags & PIN_MAPPABLE && !obj->map_and_fenceable) 4179 return true; 4180 4181 if (flags & PIN_OFFSET_BIAS && 4182 vma->node.start < (flags & PIN_OFFSET_MASK)) 4183 return true; 4184 4185 return false; 4186 } 4187 4188 int 4189 i915_gem_object_pin(struct drm_i915_gem_object *obj, 4190 struct i915_address_space *vm, 4191 uint32_t alignment, 4192 uint64_t flags) 4193 { 4194 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 4195 struct i915_vma *vma; 4196 int ret; 4197 4198 if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base)) 4199 return -ENODEV; 4200 4201 if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm))) 4202 return -EINVAL; 4203 4204 vma = i915_gem_obj_to_vma(obj, vm); 4205 if (vma) { 4206 if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) 4207 return -EBUSY; 4208 4209 if (i915_vma_misplaced(vma, alignment, flags)) { 4210 WARN(vma->pin_count, 4211 "bo is already pinned with incorrect alignment:" 4212 " offset=%lx, req.alignment=%x, req.map_and_fenceable=%d," 4213 " obj->map_and_fenceable=%d\n", 4214 i915_gem_obj_offset(obj, vm), alignment, 4215 !!(flags & PIN_MAPPABLE), 4216 obj->map_and_fenceable); 4217 ret = i915_vma_unbind(vma); 4218 if (ret) 4219 return ret; 4220 4221 vma = NULL; 4222 } 4223 } 4224 4225 if (vma == NULL || !drm_mm_node_allocated(&vma->node)) { 4226 vma = i915_gem_object_bind_to_vm(obj, vm, alignment, flags); 4227 if (IS_ERR(vma)) 4228 return PTR_ERR(vma); 4229 } 4230 4231 if (flags & PIN_GLOBAL && !obj->has_global_gtt_mapping) 4232 vma->bind_vma(vma, obj->cache_level, GLOBAL_BIND); 4233 4234 vma->pin_count++; 4235 if (flags & PIN_MAPPABLE) 4236 obj->pin_mappable |= true; 4237 4238 return 0; 4239 } 4240 4241 void 4242 i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj) 4243 { 4244 struct i915_vma *vma = i915_gem_obj_to_ggtt(obj); 4245 4246 BUG_ON(!vma); 4247 BUG_ON(vma->pin_count == 0); 4248 BUG_ON(!i915_gem_obj_ggtt_bound(obj)); 4249 4250 if (--vma->pin_count == 0) 4251 obj->pin_mappable = false; 4252 } 4253 4254 bool 4255 i915_gem_object_pin_fence(struct drm_i915_gem_object *obj) 4256 { 4257 if (obj->fence_reg != I915_FENCE_REG_NONE) { 4258 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 4259 struct i915_vma *ggtt_vma = i915_gem_obj_to_ggtt(obj); 4260 4261 WARN_ON(!ggtt_vma || 4262 dev_priv->fence_regs[obj->fence_reg].pin_count > 4263 ggtt_vma->pin_count); 4264 dev_priv->fence_regs[obj->fence_reg].pin_count++; 4265 return true; 4266 } else 4267 return false; 4268 } 4269 4270 void 4271 i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj) 4272 { 4273 if (obj->fence_reg != I915_FENCE_REG_NONE) { 4274 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 4275 WARN_ON(dev_priv->fence_regs[obj->fence_reg].pin_count <= 0); 4276 dev_priv->fence_regs[obj->fence_reg].pin_count--; 4277 } 4278 } 4279 4280 int 4281 i915_gem_pin_ioctl(struct drm_device *dev, void *data, 4282 struct drm_file *file) 4283 { 4284 struct drm_i915_gem_pin *args = data; 4285 struct drm_i915_gem_object *obj; 4286 int ret; 4287 4288 if (INTEL_INFO(dev)->gen >= 6) 4289 return -ENODEV; 4290 4291 ret = i915_mutex_lock_interruptible(dev); 4292 if (ret) 4293 return ret; 4294 4295 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4296 if (&obj->base == NULL) { 4297 ret = -ENOENT; 4298 goto unlock; 4299 } 4300 4301 if (obj->madv != I915_MADV_WILLNEED) { 4302 DRM_DEBUG("Attempting to pin a purgeable buffer\n"); 4303 ret = -EFAULT; 4304 goto out; 4305 } 4306 4307 if (obj->pin_filp != NULL && obj->pin_filp != file) { 4308 DRM_DEBUG("Already pinned in i915_gem_pin_ioctl(): %d\n", 4309 args->handle); 4310 ret = -EINVAL; 4311 goto out; 4312 } 4313 4314 if (obj->user_pin_count == ULONG_MAX) { 4315 ret = -EBUSY; 4316 goto out; 4317 } 4318 4319 if (obj->user_pin_count == 0) { 4320 ret = i915_gem_obj_ggtt_pin(obj, args->alignment, PIN_MAPPABLE); 4321 if (ret) 4322 goto out; 4323 } 4324 4325 obj->user_pin_count++; 4326 obj->pin_filp = file; 4327 4328 args->offset = i915_gem_obj_ggtt_offset(obj); 4329 out: 4330 drm_gem_object_unreference(&obj->base); 4331 unlock: 4332 mutex_unlock(&dev->struct_mutex); 4333 return ret; 4334 } 4335 4336 int 4337 i915_gem_unpin_ioctl(struct drm_device *dev, void *data, 4338 struct drm_file *file) 4339 { 4340 struct drm_i915_gem_pin *args = data; 4341 struct drm_i915_gem_object *obj; 4342 int ret; 4343 4344 ret = i915_mutex_lock_interruptible(dev); 4345 if (ret) 4346 return ret; 4347 4348 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4349 if (&obj->base == NULL) { 4350 ret = -ENOENT; 4351 goto unlock; 4352 } 4353 4354 if (obj->pin_filp != file) { 4355 DRM_DEBUG("Not pinned by caller in i915_gem_pin_ioctl(): %d\n", 4356 args->handle); 4357 ret = -EINVAL; 4358 goto out; 4359 } 4360 obj->user_pin_count--; 4361 if (obj->user_pin_count == 0) { 4362 obj->pin_filp = NULL; 4363 i915_gem_object_ggtt_unpin(obj); 4364 } 4365 4366 out: 4367 drm_gem_object_unreference(&obj->base); 4368 unlock: 4369 mutex_unlock(&dev->struct_mutex); 4370 return ret; 4371 } 4372 4373 int 4374 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 4375 struct drm_file *file) 4376 { 4377 struct drm_i915_gem_busy *args = data; 4378 struct drm_i915_gem_object *obj; 4379 int ret; 4380 4381 ret = i915_mutex_lock_interruptible(dev); 4382 if (ret) 4383 return ret; 4384 4385 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4386 if (&obj->base == NULL) { 4387 ret = -ENOENT; 4388 goto unlock; 4389 } 4390 4391 /* Count all active objects as busy, even if they are currently not used 4392 * by the gpu. Users of this interface expect objects to eventually 4393 * become non-busy without any further actions, therefore emit any 4394 * necessary flushes here. 4395 */ 4396 ret = i915_gem_object_flush_active(obj); 4397 4398 args->busy = obj->active; 4399 if (obj->ring) { 4400 args->busy |= intel_ring_flag(obj->ring) << 16; 4401 } 4402 4403 drm_gem_object_unreference(&obj->base); 4404 unlock: 4405 mutex_unlock(&dev->struct_mutex); 4406 return ret; 4407 } 4408 4409 int 4410 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 4411 struct drm_file *file_priv) 4412 { 4413 return i915_gem_ring_throttle(dev, file_priv); 4414 } 4415 4416 int 4417 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 4418 struct drm_file *file_priv) 4419 { 4420 struct drm_i915_gem_madvise *args = data; 4421 struct drm_i915_gem_object *obj; 4422 int ret; 4423 4424 switch (args->madv) { 4425 case I915_MADV_DONTNEED: 4426 case I915_MADV_WILLNEED: 4427 break; 4428 default: 4429 return -EINVAL; 4430 } 4431 4432 ret = i915_mutex_lock_interruptible(dev); 4433 if (ret) 4434 return ret; 4435 4436 obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle)); 4437 if (&obj->base == NULL) { 4438 ret = -ENOENT; 4439 goto unlock; 4440 } 4441 4442 if (i915_gem_obj_is_pinned(obj)) { 4443 ret = -EINVAL; 4444 goto out; 4445 } 4446 4447 if (obj->madv != __I915_MADV_PURGED) 4448 obj->madv = args->madv; 4449 4450 /* if the object is no longer attached, discard its backing storage */ 4451 if (i915_gem_object_is_purgeable(obj) && obj->pages == NULL) 4452 i915_gem_object_truncate(obj); 4453 4454 args->retained = obj->madv != __I915_MADV_PURGED; 4455 4456 out: 4457 drm_gem_object_unreference(&obj->base); 4458 unlock: 4459 mutex_unlock(&dev->struct_mutex); 4460 return ret; 4461 } 4462 4463 void i915_gem_object_init(struct drm_i915_gem_object *obj, 4464 const struct drm_i915_gem_object_ops *ops) 4465 { 4466 INIT_LIST_HEAD(&obj->global_list); 4467 INIT_LIST_HEAD(&obj->ring_list); 4468 INIT_LIST_HEAD(&obj->obj_exec_link); 4469 INIT_LIST_HEAD(&obj->vma_list); 4470 4471 obj->ops = ops; 4472 4473 obj->fence_reg = I915_FENCE_REG_NONE; 4474 obj->madv = I915_MADV_WILLNEED; 4475 /* Avoid an unnecessary call to unbind on the first bind. */ 4476 obj->map_and_fenceable = true; 4477 4478 i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size); 4479 } 4480 4481 static const struct drm_i915_gem_object_ops i915_gem_object_ops = { 4482 .get_pages = i915_gem_object_get_pages_gtt, 4483 .put_pages = i915_gem_object_put_pages_gtt, 4484 }; 4485 4486 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, 4487 size_t size) 4488 { 4489 struct drm_i915_gem_object *obj; 4490 #if 0 4491 struct address_space *mapping; 4492 gfp_t mask; 4493 #endif 4494 4495 obj = i915_gem_object_alloc(dev); 4496 if (obj == NULL) 4497 return NULL; 4498 4499 if (drm_gem_object_init(dev, &obj->base, size) != 0) { 4500 i915_gem_object_free(obj); 4501 return NULL; 4502 } 4503 4504 #if 0 4505 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 4506 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) { 4507 /* 965gm cannot relocate objects above 4GiB. */ 4508 mask &= ~__GFP_HIGHMEM; 4509 mask |= __GFP_DMA32; 4510 } 4511 4512 mapping = file_inode(obj->base.filp)->i_mapping; 4513 mapping_set_gfp_mask(mapping, mask); 4514 #endif 4515 4516 i915_gem_object_init(obj, &i915_gem_object_ops); 4517 4518 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4519 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4520 4521 if (HAS_LLC(dev)) { 4522 /* On some devices, we can have the GPU use the LLC (the CPU 4523 * cache) for about a 10% performance improvement 4524 * compared to uncached. Graphics requests other than 4525 * display scanout are coherent with the CPU in 4526 * accessing this cache. This means in this mode we 4527 * don't need to clflush on the CPU side, and on the 4528 * GPU side we only need to flush internal caches to 4529 * get data visible to the CPU. 4530 * 4531 * However, we maintain the display planes as UC, and so 4532 * need to rebind when first used as such. 4533 */ 4534 obj->cache_level = I915_CACHE_LLC; 4535 } else 4536 obj->cache_level = I915_CACHE_NONE; 4537 4538 trace_i915_gem_object_create(obj); 4539 4540 return obj; 4541 } 4542 4543 static bool discard_backing_storage(struct drm_i915_gem_object *obj) 4544 { 4545 /* If we are the last user of the backing storage (be it shmemfs 4546 * pages or stolen etc), we know that the pages are going to be 4547 * immediately released. In this case, we can then skip copying 4548 * back the contents from the GPU. 4549 */ 4550 4551 if (obj->madv != I915_MADV_WILLNEED) 4552 return false; 4553 4554 if (obj->base.vm_obj == NULL) 4555 return true; 4556 4557 /* At first glance, this looks racy, but then again so would be 4558 * userspace racing mmap against close. However, the first external 4559 * reference to the filp can only be obtained through the 4560 * i915_gem_mmap_ioctl() which safeguards us against the user 4561 * acquiring such a reference whilst we are in the middle of 4562 * freeing the object. 4563 */ 4564 #if 0 4565 return atomic_long_read(&obj->base.filp->f_count) == 1; 4566 #else 4567 return false; 4568 #endif 4569 } 4570 4571 void i915_gem_free_object(struct drm_gem_object *gem_obj) 4572 { 4573 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 4574 struct drm_device *dev = obj->base.dev; 4575 struct drm_i915_private *dev_priv = dev->dev_private; 4576 struct i915_vma *vma, *next; 4577 4578 intel_runtime_pm_get(dev_priv); 4579 4580 trace_i915_gem_object_destroy(obj); 4581 4582 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) { 4583 int ret; 4584 4585 vma->pin_count = 0; 4586 ret = i915_vma_unbind(vma); 4587 if (WARN_ON(ret == -ERESTARTSYS)) { 4588 bool was_interruptible; 4589 4590 was_interruptible = dev_priv->mm.interruptible; 4591 dev_priv->mm.interruptible = false; 4592 4593 WARN_ON(i915_vma_unbind(vma)); 4594 4595 dev_priv->mm.interruptible = was_interruptible; 4596 } 4597 } 4598 4599 i915_gem_object_detach_phys(obj); 4600 4601 /* Stolen objects don't hold a ref, but do hold pin count. Fix that up 4602 * before progressing. */ 4603 if (obj->stolen) 4604 i915_gem_object_unpin_pages(obj); 4605 4606 WARN_ON(obj->frontbuffer_bits); 4607 4608 if (WARN_ON(obj->pages_pin_count)) 4609 obj->pages_pin_count = 0; 4610 if (discard_backing_storage(obj)) 4611 obj->madv = I915_MADV_DONTNEED; 4612 i915_gem_object_put_pages(obj); 4613 i915_gem_object_free_mmap_offset(obj); 4614 4615 BUG_ON(obj->pages); 4616 4617 #if 0 4618 if (obj->base.import_attach) 4619 drm_prime_gem_destroy(&obj->base, NULL); 4620 #endif 4621 4622 if (obj->ops->release) 4623 obj->ops->release(obj); 4624 4625 drm_gem_object_release(&obj->base); 4626 i915_gem_info_remove_obj(dev_priv, obj->base.size); 4627 4628 kfree(obj->bit_17); 4629 i915_gem_object_free(obj); 4630 4631 intel_runtime_pm_put(dev_priv); 4632 } 4633 4634 struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, 4635 struct i915_address_space *vm) 4636 { 4637 struct i915_vma *vma; 4638 list_for_each_entry(vma, &obj->vma_list, vma_link) 4639 if (vma->vm == vm) 4640 return vma; 4641 4642 return NULL; 4643 } 4644 4645 void i915_gem_vma_destroy(struct i915_vma *vma) 4646 { 4647 WARN_ON(vma->node.allocated); 4648 4649 /* Keep the vma as a placeholder in the execbuffer reservation lists */ 4650 if (!list_empty(&vma->exec_list)) 4651 return; 4652 4653 list_del(&vma->vma_link); 4654 4655 kfree(vma); 4656 } 4657 4658 static void 4659 i915_gem_stop_ringbuffers(struct drm_device *dev) 4660 { 4661 struct drm_i915_private *dev_priv = dev->dev_private; 4662 struct intel_engine_cs *ring; 4663 int i; 4664 4665 for_each_ring(ring, dev_priv, i) 4666 intel_stop_ring_buffer(ring); 4667 } 4668 4669 int 4670 i915_gem_suspend(struct drm_device *dev) 4671 { 4672 struct drm_i915_private *dev_priv = dev->dev_private; 4673 int ret = 0; 4674 4675 mutex_lock(&dev->struct_mutex); 4676 if (dev_priv->ums.mm_suspended) 4677 goto err; 4678 4679 ret = i915_gpu_idle(dev); 4680 if (ret) 4681 goto err; 4682 4683 i915_gem_retire_requests(dev); 4684 4685 /* Under UMS, be paranoid and evict. */ 4686 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 4687 i915_gem_evict_everything(dev); 4688 4689 i915_kernel_lost_context(dev); 4690 i915_gem_stop_ringbuffers(dev); 4691 4692 /* Hack! Don't let anybody do execbuf while we don't control the chip. 4693 * We need to replace this with a semaphore, or something. 4694 * And not confound ums.mm_suspended! 4695 */ 4696 dev_priv->ums.mm_suspended = !drm_core_check_feature(dev, 4697 DRIVER_MODESET); 4698 mutex_unlock(&dev->struct_mutex); 4699 4700 del_timer_sync(&dev_priv->gpu_error.hangcheck_timer); 4701 cancel_delayed_work_sync(&dev_priv->mm.retire_work); 4702 #if 0 4703 flush_delayed_work(&dev_priv->mm.idle_work); 4704 #endif 4705 4706 return 0; 4707 4708 err: 4709 mutex_unlock(&dev->struct_mutex); 4710 return ret; 4711 } 4712 4713 int i915_gem_l3_remap(struct intel_engine_cs *ring, int slice) 4714 { 4715 struct drm_device *dev = ring->dev; 4716 struct drm_i915_private *dev_priv = dev->dev_private; 4717 u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200); 4718 u32 *remap_info = dev_priv->l3_parity.remap_info[slice]; 4719 int i, ret; 4720 4721 if (!HAS_L3_DPF(dev) || !remap_info) 4722 return 0; 4723 4724 ret = intel_ring_begin(ring, GEN7_L3LOG_SIZE / 4 * 3); 4725 if (ret) 4726 return ret; 4727 4728 /* 4729 * Note: We do not worry about the concurrent register cacheline hang 4730 * here because no other code should access these registers other than 4731 * at initialization time. 4732 */ 4733 for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) { 4734 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 4735 intel_ring_emit(ring, reg_base + i); 4736 intel_ring_emit(ring, remap_info[i/4]); 4737 } 4738 4739 intel_ring_advance(ring); 4740 4741 return ret; 4742 } 4743 4744 void i915_gem_init_swizzling(struct drm_device *dev) 4745 { 4746 struct drm_i915_private *dev_priv = dev->dev_private; 4747 4748 if (INTEL_INFO(dev)->gen < 5 || 4749 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 4750 return; 4751 4752 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 4753 DISP_TILE_SURFACE_SWIZZLING); 4754 4755 if (IS_GEN5(dev)) 4756 return; 4757 4758 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 4759 if (IS_GEN6(dev)) 4760 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 4761 else if (IS_GEN7(dev)) 4762 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 4763 else if (IS_GEN8(dev)) 4764 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW)); 4765 else 4766 BUG(); 4767 } 4768 4769 static bool 4770 intel_enable_blt(struct drm_device *dev) 4771 { 4772 int revision; 4773 4774 if (!HAS_BLT(dev)) 4775 return false; 4776 4777 /* The blitter was dysfunctional on early prototypes */ 4778 revision = pci_read_config(dev->dev, PCIR_REVID, 1); 4779 if (IS_GEN6(dev) && revision < 8) { 4780 DRM_INFO("BLT not supported on this pre-production hardware;" 4781 " graphics performance will be degraded.\n"); 4782 return false; 4783 } 4784 4785 return true; 4786 } 4787 4788 static int i915_gem_init_rings(struct drm_device *dev) 4789 { 4790 struct drm_i915_private *dev_priv = dev->dev_private; 4791 int ret; 4792 4793 ret = intel_init_render_ring_buffer(dev); 4794 if (ret) 4795 return ret; 4796 4797 if (HAS_BSD(dev)) { 4798 ret = intel_init_bsd_ring_buffer(dev); 4799 if (ret) 4800 goto cleanup_render_ring; 4801 } 4802 4803 if (intel_enable_blt(dev)) { 4804 ret = intel_init_blt_ring_buffer(dev); 4805 if (ret) 4806 goto cleanup_bsd_ring; 4807 } 4808 4809 if (HAS_VEBOX(dev)) { 4810 ret = intel_init_vebox_ring_buffer(dev); 4811 if (ret) 4812 goto cleanup_blt_ring; 4813 } 4814 4815 if (HAS_BSD2(dev)) { 4816 ret = intel_init_bsd2_ring_buffer(dev); 4817 if (ret) 4818 goto cleanup_vebox_ring; 4819 } 4820 4821 ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000)); 4822 if (ret) 4823 goto cleanup_bsd2_ring; 4824 4825 return 0; 4826 4827 cleanup_bsd2_ring: 4828 intel_cleanup_ring_buffer(&dev_priv->ring[VCS2]); 4829 cleanup_vebox_ring: 4830 intel_cleanup_ring_buffer(&dev_priv->ring[VECS]); 4831 cleanup_blt_ring: 4832 intel_cleanup_ring_buffer(&dev_priv->ring[BCS]); 4833 cleanup_bsd_ring: 4834 intel_cleanup_ring_buffer(&dev_priv->ring[VCS]); 4835 cleanup_render_ring: 4836 intel_cleanup_ring_buffer(&dev_priv->ring[RCS]); 4837 4838 return ret; 4839 } 4840 4841 int 4842 i915_gem_init_hw(struct drm_device *dev) 4843 { 4844 struct drm_i915_private *dev_priv = dev->dev_private; 4845 int ret, i; 4846 4847 #if 0 4848 if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) 4849 return -EIO; 4850 #endif 4851 4852 if (dev_priv->ellc_size) 4853 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf)); 4854 4855 if (IS_HASWELL(dev)) 4856 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev) ? 4857 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); 4858 4859 if (HAS_PCH_NOP(dev)) { 4860 if (IS_IVYBRIDGE(dev)) { 4861 u32 temp = I915_READ(GEN7_MSG_CTL); 4862 temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK); 4863 I915_WRITE(GEN7_MSG_CTL, temp); 4864 } else if (INTEL_INFO(dev)->gen >= 7) { 4865 u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT); 4866 temp &= ~RESET_PCH_HANDSHAKE_ENABLE; 4867 I915_WRITE(HSW_NDE_RSTWRN_OPT, temp); 4868 } 4869 } 4870 4871 i915_gem_init_swizzling(dev); 4872 4873 ret = i915_gem_init_rings(dev); 4874 if (ret) 4875 return ret; 4876 4877 for (i = 0; i < NUM_L3_SLICES(dev); i++) 4878 i915_gem_l3_remap(&dev_priv->ring[RCS], i); 4879 4880 /* 4881 * XXX: Contexts should only be initialized once. Doing a switch to the 4882 * default context switch however is something we'd like to do after 4883 * reset or thaw (the latter may not actually be necessary for HW, but 4884 * goes with our code better). Context switching requires rings (for 4885 * the do_switch), but before enabling PPGTT. So don't move this. 4886 */ 4887 ret = i915_gem_context_enable(dev_priv); 4888 if (ret && ret != -EIO) { 4889 DRM_ERROR("Context enable failed %d\n", ret); 4890 i915_gem_cleanup_ringbuffer(dev); 4891 } 4892 4893 return ret; 4894 } 4895 4896 int i915_gem_init(struct drm_device *dev) 4897 { 4898 struct drm_i915_private *dev_priv = dev->dev_private; 4899 int ret; 4900 4901 mutex_lock(&dev->struct_mutex); 4902 4903 if (IS_VALLEYVIEW(dev)) { 4904 /* VLVA0 (potential hack), BIOS isn't actually waking us */ 4905 I915_WRITE(VLV_GTLC_WAKE_CTRL, VLV_GTLC_ALLOWWAKEREQ); 4906 if (wait_for((I915_READ(VLV_GTLC_PW_STATUS) & 4907 VLV_GTLC_ALLOWWAKEACK), 10)) 4908 DRM_DEBUG_DRIVER("allow wake ack timed out\n"); 4909 } 4910 4911 i915_gem_init_userptr(dev); 4912 i915_gem_init_global_gtt(dev); 4913 4914 ret = i915_gem_context_init(dev); 4915 if (ret) { 4916 mutex_unlock(&dev->struct_mutex); 4917 return ret; 4918 } 4919 4920 ret = i915_gem_init_hw(dev); 4921 if (ret == -EIO) { 4922 /* Allow ring initialisation to fail by marking the GPU as 4923 * wedged. But we only want to do this where the GPU is angry, 4924 * for all other failure, such as an allocation failure, bail. 4925 */ 4926 DRM_ERROR("Failed to initialize GPU, declaring it wedged\n"); 4927 atomic_set_mask(I915_WEDGED, &dev_priv->gpu_error.reset_counter); 4928 ret = 0; 4929 } 4930 mutex_unlock(&dev->struct_mutex); 4931 4932 /* Allow hardware batchbuffers unless told otherwise, but not for KMS. */ 4933 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 4934 dev_priv->dri1.allow_batchbuffer = 1; 4935 return ret; 4936 } 4937 4938 void 4939 i915_gem_cleanup_ringbuffer(struct drm_device *dev) 4940 { 4941 struct drm_i915_private *dev_priv = dev->dev_private; 4942 struct intel_engine_cs *ring; 4943 int i; 4944 4945 for_each_ring(ring, dev_priv, i) 4946 intel_cleanup_ring_buffer(ring); 4947 } 4948 4949 int 4950 i915_gem_entervt_ioctl(struct drm_device *dev, void *data, 4951 struct drm_file *file_priv) 4952 { 4953 struct drm_i915_private *dev_priv = dev->dev_private; 4954 int ret; 4955 4956 if (drm_core_check_feature(dev, DRIVER_MODESET)) 4957 return 0; 4958 4959 if (i915_reset_in_progress(&dev_priv->gpu_error)) { 4960 DRM_ERROR("Reenabling wedged hardware, good luck\n"); 4961 atomic_set(&dev_priv->gpu_error.reset_counter, 0); 4962 } 4963 4964 mutex_lock(&dev->struct_mutex); 4965 dev_priv->ums.mm_suspended = 0; 4966 4967 ret = i915_gem_init_hw(dev); 4968 if (ret != 0) { 4969 mutex_unlock(&dev->struct_mutex); 4970 return ret; 4971 } 4972 4973 BUG_ON(!list_empty(&dev_priv->gtt.base.active_list)); 4974 4975 ret = drm_irq_install(dev, dev->irq); 4976 if (ret) 4977 goto cleanup_ringbuffer; 4978 mutex_unlock(&dev->struct_mutex); 4979 4980 return 0; 4981 4982 cleanup_ringbuffer: 4983 i915_gem_cleanup_ringbuffer(dev); 4984 dev_priv->ums.mm_suspended = 1; 4985 mutex_unlock(&dev->struct_mutex); 4986 4987 return ret; 4988 } 4989 4990 int 4991 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, 4992 struct drm_file *file_priv) 4993 { 4994 if (drm_core_check_feature(dev, DRIVER_MODESET)) 4995 return 0; 4996 4997 mutex_lock(&dev->struct_mutex); 4998 drm_irq_uninstall(dev); 4999 mutex_unlock(&dev->struct_mutex); 5000 5001 return i915_gem_suspend(dev); 5002 } 5003 5004 void 5005 i915_gem_lastclose(struct drm_device *dev) 5006 { 5007 int ret; 5008 5009 if (drm_core_check_feature(dev, DRIVER_MODESET)) 5010 return; 5011 5012 ret = i915_gem_suspend(dev); 5013 if (ret) 5014 DRM_ERROR("failed to idle hardware: %d\n", ret); 5015 } 5016 5017 static void 5018 init_ring_lists(struct intel_engine_cs *ring) 5019 { 5020 INIT_LIST_HEAD(&ring->active_list); 5021 INIT_LIST_HEAD(&ring->request_list); 5022 } 5023 5024 void i915_init_vm(struct drm_i915_private *dev_priv, 5025 struct i915_address_space *vm) 5026 { 5027 if (!i915_is_ggtt(vm)) 5028 drm_mm_init(&vm->mm, vm->start, vm->total); 5029 vm->dev = dev_priv->dev; 5030 INIT_LIST_HEAD(&vm->active_list); 5031 INIT_LIST_HEAD(&vm->inactive_list); 5032 INIT_LIST_HEAD(&vm->global_link); 5033 list_add_tail(&vm->global_link, &dev_priv->vm_list); 5034 } 5035 5036 void 5037 i915_gem_load(struct drm_device *dev) 5038 { 5039 struct drm_i915_private *dev_priv = dev->dev_private; 5040 int i; 5041 5042 INIT_LIST_HEAD(&dev_priv->vm_list); 5043 i915_init_vm(dev_priv, &dev_priv->gtt.base); 5044 5045 INIT_LIST_HEAD(&dev_priv->context_list); 5046 INIT_LIST_HEAD(&dev_priv->mm.unbound_list); 5047 INIT_LIST_HEAD(&dev_priv->mm.bound_list); 5048 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 5049 for (i = 0; i < I915_NUM_RINGS; i++) 5050 init_ring_lists(&dev_priv->ring[i]); 5051 for (i = 0; i < I915_MAX_NUM_FENCES; i++) 5052 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); 5053 INIT_DELAYED_WORK(&dev_priv->mm.retire_work, 5054 i915_gem_retire_work_handler); 5055 INIT_DELAYED_WORK(&dev_priv->mm.idle_work, 5056 i915_gem_idle_work_handler); 5057 init_waitqueue_head(&dev_priv->gpu_error.reset_queue); 5058 5059 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */ 5060 if (!drm_core_check_feature(dev, DRIVER_MODESET) && IS_GEN3(dev)) { 5061 I915_WRITE(MI_ARB_STATE, 5062 _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE)); 5063 } 5064 5065 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; 5066 5067 /* Old X drivers will take 0-2 for front, back, depth buffers */ 5068 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 5069 dev_priv->fence_reg_start = 3; 5070 5071 if (INTEL_INFO(dev)->gen >= 7 && !IS_VALLEYVIEW(dev)) 5072 dev_priv->num_fence_regs = 32; 5073 else if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) 5074 dev_priv->num_fence_regs = 16; 5075 else 5076 dev_priv->num_fence_regs = 8; 5077 5078 /* Initialize fence registers to zero */ 5079 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 5080 i915_gem_restore_fences(dev); 5081 5082 i915_gem_detect_bit_6_swizzle(dev); 5083 init_waitqueue_head(&dev_priv->pending_flip_queue); 5084 5085 dev_priv->mm.interruptible = true; 5086 5087 #if 0 5088 dev_priv->mm.shrinker.scan_objects = i915_gem_shrinker_scan; 5089 dev_priv->mm.shrinker.count_objects = i915_gem_shrinker_count; 5090 dev_priv->mm.shrinker.seeks = DEFAULT_SEEKS; 5091 register_shrinker(&dev_priv->mm.shrinker); 5092 5093 dev_priv->mm.oom_notifier.notifier_call = i915_gem_shrinker_oom; 5094 register_oom_notifier(&dev_priv->mm.oom_notifier); 5095 #endif 5096 5097 lockinit(&dev_priv->fb_tracking.lock, "drmftl", 0, LK_CANRECURSE); 5098 } 5099 5100 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 5101 { 5102 struct drm_i915_file_private *file_priv = file->driver_priv; 5103 5104 cancel_delayed_work_sync(&file_priv->mm.idle_work); 5105 5106 /* Clean up our request list when the client is going away, so that 5107 * later retire_requests won't dereference our soon-to-be-gone 5108 * file_priv. 5109 */ 5110 spin_lock(&file_priv->mm.lock); 5111 while (!list_empty(&file_priv->mm.request_list)) { 5112 struct drm_i915_gem_request *request; 5113 5114 request = list_first_entry(&file_priv->mm.request_list, 5115 struct drm_i915_gem_request, 5116 client_list); 5117 list_del(&request->client_list); 5118 request->file_priv = NULL; 5119 } 5120 spin_unlock(&file_priv->mm.lock); 5121 } 5122 5123 int 5124 i915_gem_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, 5125 vm_ooffset_t foff, struct ucred *cred, u_short *color) 5126 { 5127 *color = 0; /* XXXKIB */ 5128 return (0); 5129 } 5130 5131 void 5132 i915_gem_pager_dtor(void *handle) 5133 { 5134 struct drm_gem_object *obj; 5135 struct drm_device *dev; 5136 5137 obj = handle; 5138 dev = obj->dev; 5139 5140 mutex_lock(&dev->struct_mutex); 5141 drm_gem_free_mmap_offset(obj); 5142 i915_gem_release_mmap(to_intel_bo(obj)); 5143 drm_gem_object_unreference(obj); 5144 mutex_unlock(&dev->struct_mutex); 5145 } 5146 5147 static void 5148 i915_gem_file_idle_work_handler(struct work_struct *work) 5149 { 5150 struct drm_i915_file_private *file_priv = 5151 container_of(work, typeof(*file_priv), mm.idle_work.work); 5152 5153 atomic_set(&file_priv->rps_wait_boost, false); 5154 } 5155 5156 int i915_gem_open(struct drm_device *dev, struct drm_file *file) 5157 { 5158 struct drm_i915_file_private *file_priv; 5159 int ret; 5160 5161 DRM_DEBUG_DRIVER("\n"); 5162 5163 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 5164 if (!file_priv) 5165 return -ENOMEM; 5166 5167 file->driver_priv = file_priv; 5168 file_priv->dev_priv = dev->dev_private; 5169 file_priv->file = file; 5170 5171 spin_init(&file_priv->mm.lock, "i915_priv"); 5172 INIT_LIST_HEAD(&file_priv->mm.request_list); 5173 INIT_DELAYED_WORK(&file_priv->mm.idle_work, 5174 i915_gem_file_idle_work_handler); 5175 5176 ret = i915_gem_context_open(dev, file); 5177 if (ret) 5178 kfree(file_priv); 5179 5180 return ret; 5181 } 5182 5183 void i915_gem_track_fb(struct drm_i915_gem_object *old, 5184 struct drm_i915_gem_object *new, 5185 unsigned frontbuffer_bits) 5186 { 5187 if (old) { 5188 WARN_ON(!mutex_is_locked(&old->base.dev->struct_mutex)); 5189 WARN_ON(!(old->frontbuffer_bits & frontbuffer_bits)); 5190 old->frontbuffer_bits &= ~frontbuffer_bits; 5191 } 5192 5193 if (new) { 5194 WARN_ON(!mutex_is_locked(&new->base.dev->struct_mutex)); 5195 WARN_ON(new->frontbuffer_bits & frontbuffer_bits); 5196 new->frontbuffer_bits |= frontbuffer_bits; 5197 } 5198 } 5199 5200 #if 0 5201 static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task) 5202 { 5203 if (!mutex_is_locked(mutex)) 5204 return false; 5205 5206 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES) 5207 return mutex->owner == task; 5208 #else 5209 /* Since UP may be pre-empted, we cannot assume that we own the lock */ 5210 return false; 5211 #endif 5212 } 5213 #endif 5214 5215 #if 0 5216 static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock) 5217 { 5218 if (!mutex_trylock(&dev->struct_mutex)) { 5219 if (!mutex_is_locked_by(&dev->struct_mutex, current)) 5220 return false; 5221 5222 if (to_i915(dev)->mm.shrinker_no_lock_stealing) 5223 return false; 5224 5225 *unlock = false; 5226 } else 5227 *unlock = true; 5228 5229 return true; 5230 } 5231 5232 static int num_vma_bound(struct drm_i915_gem_object *obj) 5233 { 5234 struct i915_vma *vma; 5235 int count = 0; 5236 5237 list_for_each_entry(vma, &obj->vma_list, vma_link) 5238 if (drm_mm_node_allocated(&vma->node)) 5239 count++; 5240 5241 return count; 5242 } 5243 5244 static unsigned long 5245 i915_gem_inactive_count(struct shrinker *shrinker, struct shrink_control *sc) 5246 { 5247 struct drm_i915_private *dev_priv = 5248 container_of(shrinker, 5249 struct drm_i915_private, 5250 mm.inactive_shrinker); 5251 struct drm_device *dev = dev_priv->dev; 5252 struct drm_i915_gem_object *obj; 5253 unsigned long count; 5254 bool unlock; 5255 5256 if (!i915_gem_shrinker_lock(dev, &unlock)) 5257 return 0; 5258 5259 count = 0; 5260 list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) 5261 if (obj->pages_pin_count == 0) 5262 count += obj->base.size >> PAGE_SHIFT; 5263 5264 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 5265 if (!i915_gem_obj_is_pinned(obj) && 5266 obj->pages_pin_count == num_vma_bound(obj)) 5267 count += obj->base.size >> PAGE_SHIFT; 5268 } 5269 5270 if (unlock) 5271 mutex_unlock(&dev->struct_mutex); 5272 5273 return count; 5274 } 5275 #endif 5276 5277 /* All the new VM stuff */ 5278 unsigned long i915_gem_obj_offset(struct drm_i915_gem_object *o, 5279 struct i915_address_space *vm) 5280 { 5281 struct drm_i915_private *dev_priv = o->base.dev->dev_private; 5282 struct i915_vma *vma; 5283 5284 if (!dev_priv->mm.aliasing_ppgtt || 5285 vm == &dev_priv->mm.aliasing_ppgtt->base) 5286 vm = &dev_priv->gtt.base; 5287 5288 list_for_each_entry(vma, &o->vma_list, vma_link) { 5289 if (vma->vm == vm) 5290 return vma->node.start; 5291 5292 } 5293 WARN(1, "%s vma for this object not found.\n", 5294 i915_is_ggtt(vm) ? "global" : "ppgtt"); 5295 return -1; 5296 } 5297 5298 bool i915_gem_obj_bound(struct drm_i915_gem_object *o, 5299 struct i915_address_space *vm) 5300 { 5301 struct i915_vma *vma; 5302 5303 list_for_each_entry(vma, &o->vma_list, vma_link) 5304 if (vma->vm == vm && drm_mm_node_allocated(&vma->node)) 5305 return true; 5306 5307 return false; 5308 } 5309 5310 bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o) 5311 { 5312 struct i915_vma *vma; 5313 5314 list_for_each_entry(vma, &o->vma_list, vma_link) 5315 if (drm_mm_node_allocated(&vma->node)) 5316 return true; 5317 5318 return false; 5319 } 5320 5321 unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o, 5322 struct i915_address_space *vm) 5323 { 5324 struct drm_i915_private *dev_priv = o->base.dev->dev_private; 5325 struct i915_vma *vma; 5326 5327 if (!dev_priv->mm.aliasing_ppgtt || 5328 vm == &dev_priv->mm.aliasing_ppgtt->base) 5329 vm = &dev_priv->gtt.base; 5330 5331 BUG_ON(list_empty(&o->vma_list)); 5332 5333 list_for_each_entry(vma, &o->vma_list, vma_link) 5334 if (vma->vm == vm) 5335 return vma->node.size; 5336 5337 return 0; 5338 } 5339 5340 #if 0 5341 static unsigned long 5342 i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) 5343 { 5344 struct drm_i915_private *dev_priv = 5345 container_of(shrinker, struct drm_i915_private, mm.shrinker); 5346 struct drm_device *dev = dev_priv->dev; 5347 unsigned long freed; 5348 bool unlock; 5349 5350 if (!i915_gem_shrinker_lock(dev, &unlock)) 5351 return SHRINK_STOP; 5352 5353 freed = i915_gem_purge(dev_priv, sc->nr_to_scan); 5354 if (freed < sc->nr_to_scan) 5355 freed += __i915_gem_shrink(dev_priv, 5356 sc->nr_to_scan - freed, 5357 false); 5358 if (freed < sc->nr_to_scan) 5359 freed += i915_gem_shrink_all(dev_priv); 5360 5361 if (unlock) 5362 mutex_unlock(&dev->struct_mutex); 5363 5364 return freed; 5365 } 5366 #endif 5367 5368 struct i915_vma *i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj) 5369 { 5370 struct i915_vma *vma; 5371 5372 /* This WARN has probably outlived its usefulness (callers already 5373 * WARN if they don't find the GGTT vma they expect). When removing, 5374 * remember to remove the pre-check in is_pin_display() as well */ 5375 if (WARN_ON(list_empty(&obj->vma_list))) 5376 return NULL; 5377 5378 vma = list_first_entry(&obj->vma_list, typeof(*vma), vma_link); 5379 if (vma->vm != obj_to_ggtt(obj)) 5380 return NULL; 5381 5382 return vma; 5383 } 5384