1 /* 2 * Copyright © 2008 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 * Copyright (c) 2011 The FreeBSD Foundation 27 * All rights reserved. 28 * 29 * This software was developed by Konstantin Belousov under sponsorship from 30 * the FreeBSD Foundation. 31 * 32 * Redistribution and use in source and binary forms, with or without 33 * modification, are permitted provided that the following conditions 34 * are met: 35 * 1. Redistributions of source code must retain the above copyright 36 * notice, this list of conditions and the following disclaimer. 37 * 2. Redistributions in binary form must reproduce the above copyright 38 * notice, this list of conditions and the following disclaimer in the 39 * documentation and/or other materials provided with the distribution. 40 * 41 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 51 * SUCH DAMAGE. 52 * 53 */ 54 55 #include <machine/md_var.h> 56 57 #include <drm/drmP.h> 58 #include <drm/drm_vma_manager.h> 59 #include <drm/i915_drm.h> 60 #include "i915_drv.h" 61 #include "i915_trace.h" 62 #include "intel_drv.h" 63 #include <linux/shmem_fs.h> 64 #include <linux/slab.h> 65 #include <linux/swap.h> 66 #include <linux/pci.h> 67 68 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); 69 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj, 70 bool force); 71 static __must_check int 72 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, 73 bool readonly); 74 static void 75 i915_gem_object_retire(struct drm_i915_gem_object *obj); 76 77 static void i915_gem_write_fence(struct drm_device *dev, int reg, 78 struct drm_i915_gem_object *obj); 79 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, 80 struct drm_i915_fence_reg *fence, 81 bool enable); 82 83 static unsigned long i915_gem_purge(struct drm_i915_private *dev_priv, long target); 84 static unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv); 85 86 static bool cpu_cache_is_coherent(struct drm_device *dev, 87 enum i915_cache_level level) 88 { 89 return HAS_LLC(dev) || level != I915_CACHE_NONE; 90 } 91 92 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 93 { 94 if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) 95 return true; 96 97 return obj->pin_display; 98 } 99 100 static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) 101 { 102 if (obj->tiling_mode) 103 i915_gem_release_mmap(obj); 104 105 /* As we do not have an associated fence register, we will force 106 * a tiling change if we ever need to acquire one. 107 */ 108 obj->fence_dirty = false; 109 obj->fence_reg = I915_FENCE_REG_NONE; 110 } 111 112 /* some bookkeeping */ 113 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 114 size_t size) 115 { 116 spin_lock(&dev_priv->mm.object_stat_lock); 117 dev_priv->mm.object_count++; 118 dev_priv->mm.object_memory += size; 119 spin_unlock(&dev_priv->mm.object_stat_lock); 120 } 121 122 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 123 size_t size) 124 { 125 spin_lock(&dev_priv->mm.object_stat_lock); 126 dev_priv->mm.object_count--; 127 dev_priv->mm.object_memory -= size; 128 spin_unlock(&dev_priv->mm.object_stat_lock); 129 } 130 131 static int 132 i915_gem_wait_for_error(struct i915_gpu_error *error) 133 { 134 int ret; 135 136 #define EXIT_COND (!i915_reset_in_progress(error) || \ 137 i915_terminally_wedged(error)) 138 if (EXIT_COND) 139 return 0; 140 141 /* 142 * Only wait 10 seconds for the gpu reset to complete to avoid hanging 143 * userspace. If it takes that long something really bad is going on and 144 * we should simply try to bail out and fail as gracefully as possible. 145 */ 146 ret = wait_event_interruptible_timeout(error->reset_queue, 147 EXIT_COND, 148 10*HZ); 149 if (ret == 0) { 150 DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); 151 return -EIO; 152 } else if (ret < 0) { 153 return ret; 154 } 155 #undef EXIT_COND 156 157 return 0; 158 } 159 160 int i915_mutex_lock_interruptible(struct drm_device *dev) 161 { 162 struct drm_i915_private *dev_priv = dev->dev_private; 163 int ret; 164 165 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 166 if (ret) 167 return ret; 168 169 ret = mutex_lock_interruptible(&dev->struct_mutex); 170 if (ret) 171 return ret; 172 173 WARN_ON(i915_verify_lists(dev)); 174 return 0; 175 } 176 177 static inline bool 178 i915_gem_object_is_inactive(struct drm_i915_gem_object *obj) 179 { 180 return i915_gem_obj_bound_any(obj) && !obj->active; 181 } 182 183 int 184 i915_gem_init_ioctl(struct drm_device *dev, void *data, 185 struct drm_file *file) 186 { 187 struct drm_i915_private *dev_priv = dev->dev_private; 188 struct drm_i915_gem_init *args = data; 189 190 if (drm_core_check_feature(dev, DRIVER_MODESET)) 191 return -ENODEV; 192 193 if (args->gtt_start >= args->gtt_end || 194 (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1)) 195 return -EINVAL; 196 197 /* GEM with user mode setting was never supported on ilk and later. */ 198 if (INTEL_INFO(dev)->gen >= 5) 199 return -ENODEV; 200 201 mutex_lock(&dev->struct_mutex); 202 kprintf("INITGLOBALGTT GTT_START %016jx\n", (uintmax_t)args->gtt_start); 203 i915_gem_setup_global_gtt(dev, args->gtt_start, args->gtt_end, 204 args->gtt_end); 205 dev_priv->gtt.mappable_end = args->gtt_end; 206 mutex_unlock(&dev->struct_mutex); 207 208 return 0; 209 } 210 211 int 212 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 213 struct drm_file *file) 214 { 215 struct drm_i915_private *dev_priv = dev->dev_private; 216 struct drm_i915_gem_get_aperture *args = data; 217 struct drm_i915_gem_object *obj; 218 size_t pinned; 219 220 pinned = 0; 221 mutex_lock(&dev->struct_mutex); 222 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) 223 if (i915_gem_obj_is_pinned(obj)) 224 pinned += i915_gem_obj_ggtt_size(obj); 225 mutex_unlock(&dev->struct_mutex); 226 227 args->aper_size = dev_priv->gtt.base.total; 228 args->aper_available_size = args->aper_size - pinned; 229 230 return 0; 231 } 232 233 static void i915_gem_object_detach_phys(struct drm_i915_gem_object *obj) 234 { 235 drm_dma_handle_t *phys = obj->phys_handle; 236 237 if (!phys) 238 return; 239 240 if (obj->madv == I915_MADV_WILLNEED) { 241 struct vm_object *mapping = obj->base.vm_obj; 242 char *vaddr = phys->vaddr; 243 int i; 244 245 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 246 struct vm_page *page = shmem_read_mapping_page(mapping, i); 247 if (!IS_ERR(page)) { 248 char *dst = kmap_atomic(page); 249 memcpy(dst, vaddr, PAGE_SIZE); 250 drm_clflush_virt_range(dst, PAGE_SIZE); 251 kunmap_atomic(dst); 252 253 set_page_dirty(page); 254 mark_page_accessed(page); 255 #if 0 256 page_cache_release(page); 257 #endif 258 } 259 vaddr += PAGE_SIZE; 260 } 261 i915_gem_chipset_flush(obj->base.dev); 262 } 263 264 #ifdef CONFIG_X86 265 set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE); 266 #endif 267 drm_pci_free(obj->base.dev, phys); 268 obj->phys_handle = NULL; 269 } 270 271 int 272 i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, 273 int align) 274 { 275 drm_dma_handle_t *phys; 276 struct vm_object *mapping; 277 char *vaddr; 278 int i; 279 280 if (obj->phys_handle) { 281 if ((unsigned long)obj->phys_handle->vaddr & (align -1)) 282 return -EBUSY; 283 284 return 0; 285 } 286 287 if (obj->madv != I915_MADV_WILLNEED) 288 return -EFAULT; 289 290 #if 0 291 if (obj->base.filp == NULL) 292 return -EINVAL; 293 #endif 294 295 /* create a new object */ 296 phys = drm_pci_alloc(obj->base.dev, obj->base.size, align); 297 if (!phys) 298 return -ENOMEM; 299 300 vaddr = phys->vaddr; 301 #ifdef CONFIG_X86 302 set_memory_wc((unsigned long)vaddr, phys->size / PAGE_SIZE); 303 #endif 304 mapping = obj->base.vm_obj; 305 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 306 struct vm_page *page; 307 char *src; 308 309 page = shmem_read_mapping_page(mapping, i); 310 if (IS_ERR(page)) { 311 #ifdef CONFIG_X86 312 set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE); 313 #endif 314 drm_pci_free(obj->base.dev, phys); 315 return PTR_ERR(page); 316 } 317 318 src = kmap_atomic(page); 319 memcpy(vaddr, src, PAGE_SIZE); 320 kunmap_atomic(src); 321 322 mark_page_accessed(page); 323 #if 0 324 page_cache_release(page); 325 #endif 326 327 vaddr += PAGE_SIZE; 328 } 329 330 obj->phys_handle = phys; 331 return 0; 332 } 333 334 static int 335 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, 336 struct drm_i915_gem_pwrite *args, 337 struct drm_file *file_priv) 338 { 339 struct drm_device *dev = obj->base.dev; 340 void *vaddr = (char *)obj->phys_handle->vaddr + args->offset; 341 char __user *user_data = to_user_ptr(args->data_ptr); 342 343 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) { 344 unsigned long unwritten; 345 346 /* The physical object once assigned is fixed for the lifetime 347 * of the obj, so we can safely drop the lock and continue 348 * to access vaddr. 349 */ 350 mutex_unlock(&dev->struct_mutex); 351 unwritten = copy_from_user(vaddr, user_data, args->size); 352 mutex_lock(&dev->struct_mutex); 353 if (unwritten) 354 return -EFAULT; 355 } 356 357 i915_gem_chipset_flush(dev); 358 return 0; 359 } 360 361 void *i915_gem_object_alloc(struct drm_device *dev) 362 { 363 return kmalloc(sizeof(struct drm_i915_gem_object), 364 M_DRM, M_WAITOK | M_ZERO); 365 } 366 367 void i915_gem_object_free(struct drm_i915_gem_object *obj) 368 { 369 kfree(obj); 370 } 371 372 static int 373 i915_gem_create(struct drm_file *file, 374 struct drm_device *dev, 375 uint64_t size, 376 uint32_t *handle_p) 377 { 378 struct drm_i915_gem_object *obj; 379 int ret; 380 u32 handle; 381 382 size = roundup(size, PAGE_SIZE); 383 if (size == 0) 384 return -EINVAL; 385 386 /* Allocate the new object */ 387 obj = i915_gem_alloc_object(dev, size); 388 if (obj == NULL) 389 return -ENOMEM; 390 391 ret = drm_gem_handle_create(file, &obj->base, &handle); 392 /* drop reference from allocate - handle holds it now */ 393 drm_gem_object_unreference_unlocked(&obj->base); 394 if (ret) 395 return ret; 396 397 *handle_p = handle; 398 return 0; 399 } 400 401 int 402 i915_gem_dumb_create(struct drm_file *file, 403 struct drm_device *dev, 404 struct drm_mode_create_dumb *args) 405 { 406 /* have to work out size/pitch and return them */ 407 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); 408 args->size = args->pitch * args->height; 409 return i915_gem_create(file, dev, 410 args->size, &args->handle); 411 } 412 413 /** 414 * Creates a new mm object and returns a handle to it. 415 */ 416 int 417 i915_gem_create_ioctl(struct drm_device *dev, void *data, 418 struct drm_file *file) 419 { 420 struct drm_i915_gem_create *args = data; 421 422 return i915_gem_create(file, dev, 423 args->size, &args->handle); 424 } 425 426 static inline int 427 __copy_to_user_swizzled(char __user *cpu_vaddr, 428 const char *gpu_vaddr, int gpu_offset, 429 int length) 430 { 431 int ret, cpu_offset = 0; 432 433 while (length > 0) { 434 int cacheline_end = ALIGN(gpu_offset + 1, 64); 435 int this_length = min(cacheline_end - gpu_offset, length); 436 int swizzled_gpu_offset = gpu_offset ^ 64; 437 438 ret = __copy_to_user(cpu_vaddr + cpu_offset, 439 gpu_vaddr + swizzled_gpu_offset, 440 this_length); 441 if (ret) 442 return ret + length; 443 444 cpu_offset += this_length; 445 gpu_offset += this_length; 446 length -= this_length; 447 } 448 449 return 0; 450 } 451 452 static inline int 453 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, 454 const char __user *cpu_vaddr, 455 int length) 456 { 457 int ret, cpu_offset = 0; 458 459 while (length > 0) { 460 int cacheline_end = ALIGN(gpu_offset + 1, 64); 461 int this_length = min(cacheline_end - gpu_offset, length); 462 int swizzled_gpu_offset = gpu_offset ^ 64; 463 464 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, 465 cpu_vaddr + cpu_offset, 466 this_length); 467 if (ret) 468 return ret + length; 469 470 cpu_offset += this_length; 471 gpu_offset += this_length; 472 length -= this_length; 473 } 474 475 return 0; 476 } 477 478 /* 479 * Pins the specified object's pages and synchronizes the object with 480 * GPU accesses. Sets needs_clflush to non-zero if the caller should 481 * flush the object from the CPU cache. 482 */ 483 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, 484 int *needs_clflush) 485 { 486 int ret; 487 488 *needs_clflush = 0; 489 490 #if 0 491 if (!obj->base.filp) 492 return -EINVAL; 493 #endif 494 495 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) { 496 /* If we're not in the cpu read domain, set ourself into the gtt 497 * read domain and manually flush cachelines (if required). This 498 * optimizes for the case when the gpu will dirty the data 499 * anyway again before the next pread happens. */ 500 *needs_clflush = !cpu_cache_is_coherent(obj->base.dev, 501 obj->cache_level); 502 ret = i915_gem_object_wait_rendering(obj, true); 503 if (ret) 504 return ret; 505 506 i915_gem_object_retire(obj); 507 } 508 509 ret = i915_gem_object_get_pages(obj); 510 if (ret) 511 return ret; 512 513 i915_gem_object_pin_pages(obj); 514 515 return ret; 516 } 517 518 /* Per-page copy function for the shmem pread fastpath. 519 * Flushes invalid cachelines before reading the target if 520 * needs_clflush is set. */ 521 static int 522 shmem_pread_fast(struct vm_page *page, int shmem_page_offset, int page_length, 523 char __user *user_data, 524 bool page_do_bit17_swizzling, bool needs_clflush) 525 { 526 char *vaddr; 527 int ret; 528 529 if (unlikely(page_do_bit17_swizzling)) 530 return -EINVAL; 531 532 vaddr = kmap_atomic(page); 533 if (needs_clflush) 534 drm_clflush_virt_range(vaddr + shmem_page_offset, 535 page_length); 536 ret = __copy_to_user_inatomic(user_data, 537 vaddr + shmem_page_offset, 538 page_length); 539 kunmap_atomic(vaddr); 540 541 return ret ? -EFAULT : 0; 542 } 543 544 static void 545 shmem_clflush_swizzled_range(char *addr, unsigned long length, 546 bool swizzled) 547 { 548 if (unlikely(swizzled)) { 549 unsigned long start = (unsigned long) addr; 550 unsigned long end = (unsigned long) addr + length; 551 552 /* For swizzling simply ensure that we always flush both 553 * channels. Lame, but simple and it works. Swizzled 554 * pwrite/pread is far from a hotpath - current userspace 555 * doesn't use it at all. */ 556 start = round_down(start, 128); 557 end = round_up(end, 128); 558 559 drm_clflush_virt_range((void *)start, end - start); 560 } else { 561 drm_clflush_virt_range(addr, length); 562 } 563 564 } 565 566 /* Only difference to the fast-path function is that this can handle bit17 567 * and uses non-atomic copy and kmap functions. */ 568 static int 569 shmem_pread_slow(struct vm_page *page, int shmem_page_offset, int page_length, 570 char __user *user_data, 571 bool page_do_bit17_swizzling, bool needs_clflush) 572 { 573 char *vaddr; 574 int ret; 575 576 vaddr = kmap(page); 577 if (needs_clflush) 578 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 579 page_length, 580 page_do_bit17_swizzling); 581 582 if (page_do_bit17_swizzling) 583 ret = __copy_to_user_swizzled(user_data, 584 vaddr, shmem_page_offset, 585 page_length); 586 else 587 ret = __copy_to_user(user_data, 588 vaddr + shmem_page_offset, 589 page_length); 590 kunmap(page); 591 592 return ret ? - EFAULT : 0; 593 } 594 595 static int 596 i915_gem_shmem_pread(struct drm_device *dev, 597 struct drm_i915_gem_object *obj, 598 struct drm_i915_gem_pread *args, 599 struct drm_file *file) 600 { 601 char __user *user_data; 602 ssize_t remain; 603 loff_t offset; 604 int shmem_page_offset, page_length, ret = 0; 605 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 606 int prefaulted = 0; 607 int needs_clflush = 0; 608 int i; 609 610 user_data = to_user_ptr(args->data_ptr); 611 remain = args->size; 612 613 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 614 615 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); 616 if (ret) 617 return ret; 618 619 offset = args->offset; 620 621 for (i = 0; i < (obj->base.size >> PAGE_SHIFT); i++) { 622 struct vm_page *page = obj->pages[i]; 623 624 if (remain <= 0) 625 break; 626 627 /* Operation in this page 628 * 629 * shmem_page_offset = offset within page in shmem file 630 * page_length = bytes to copy for this page 631 */ 632 shmem_page_offset = offset_in_page(offset); 633 page_length = remain; 634 if ((shmem_page_offset + page_length) > PAGE_SIZE) 635 page_length = PAGE_SIZE - shmem_page_offset; 636 637 page_do_bit17_swizzling = obj_do_bit17_swizzling && 638 (page_to_phys(page) & (1 << 17)) != 0; 639 640 ret = shmem_pread_fast(page, shmem_page_offset, page_length, 641 user_data, page_do_bit17_swizzling, 642 needs_clflush); 643 if (ret == 0) 644 goto next_page; 645 646 mutex_unlock(&dev->struct_mutex); 647 648 if (likely(!i915.prefault_disable) && !prefaulted) { 649 ret = fault_in_multipages_writeable(user_data, remain); 650 /* Userspace is tricking us, but we've already clobbered 651 * its pages with the prefault and promised to write the 652 * data up to the first fault. Hence ignore any errors 653 * and just continue. */ 654 (void)ret; 655 prefaulted = 1; 656 } 657 658 ret = shmem_pread_slow(page, shmem_page_offset, page_length, 659 user_data, page_do_bit17_swizzling, 660 needs_clflush); 661 662 mutex_lock(&dev->struct_mutex); 663 664 if (ret) 665 goto out; 666 667 next_page: 668 remain -= page_length; 669 user_data += page_length; 670 offset += page_length; 671 } 672 673 out: 674 i915_gem_object_unpin_pages(obj); 675 676 return ret; 677 } 678 679 /** 680 * Reads data from the object referenced by handle. 681 * 682 * On error, the contents of *data are undefined. 683 */ 684 int 685 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 686 struct drm_file *file) 687 { 688 struct drm_i915_gem_pread *args = data; 689 struct drm_i915_gem_object *obj; 690 int ret = 0; 691 692 if (args->size == 0) 693 return 0; 694 695 ret = i915_mutex_lock_interruptible(dev); 696 if (ret) 697 return ret; 698 699 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 700 if (&obj->base == NULL) { 701 ret = -ENOENT; 702 goto unlock; 703 } 704 705 /* Bounds check source. */ 706 if (args->offset > obj->base.size || 707 args->size > obj->base.size - args->offset) { 708 ret = -EINVAL; 709 goto out; 710 } 711 712 trace_i915_gem_object_pread(obj, args->offset, args->size); 713 714 ret = i915_gem_shmem_pread(dev, obj, args, file); 715 716 out: 717 drm_gem_object_unreference(&obj->base); 718 unlock: 719 mutex_unlock(&dev->struct_mutex); 720 return ret; 721 } 722 723 /* This is the fast write path which cannot handle 724 * page faults in the source data 725 */ 726 727 #if 0 /* XXX: buggy on core2 machines */ 728 static inline int 729 fast_user_write(struct io_mapping *mapping, 730 loff_t page_base, int page_offset, 731 char __user *user_data, 732 int length) 733 { 734 void __iomem *vaddr_atomic; 735 void *vaddr; 736 unsigned long unwritten; 737 738 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); 739 /* We can use the cpu mem copy function because this is X86. */ 740 vaddr = (char __force*)vaddr_atomic + page_offset; 741 unwritten = __copy_from_user_inatomic_nocache(vaddr, 742 user_data, length); 743 io_mapping_unmap_atomic(vaddr_atomic); 744 return unwritten; 745 } 746 #endif 747 748 static int 749 i915_gem_gtt_write(struct drm_device *dev, struct drm_i915_gem_object *obj, 750 uint64_t data_ptr, uint64_t size, uint64_t offset, struct drm_file *file) 751 { 752 vm_offset_t mkva; 753 int ret; 754 755 /* 756 * Pass the unaligned physical address and size to pmap_mapdev_attr() 757 * so it can properly calculate whether an extra page needs to be 758 * mapped or not to cover the requested range. The function will 759 * add the page offset into the returned mkva for us. 760 */ 761 mkva = (vm_offset_t)pmap_mapdev_attr(dev->agp->base + 762 i915_gem_obj_ggtt_offset(obj) + offset, size, PAT_WRITE_COMBINING); 763 ret = -copyin_nofault((void *)(uintptr_t)data_ptr, (char *)mkva, size); 764 pmap_unmapdev(mkva, size); 765 return ret; 766 } 767 768 /** 769 * This is the fast pwrite path, where we copy the data directly from the 770 * user into the GTT, uncached. 771 */ 772 static int 773 i915_gem_gtt_pwrite_fast(struct drm_device *dev, 774 struct drm_i915_gem_object *obj, 775 struct drm_i915_gem_pwrite *args, 776 struct drm_file *file) 777 { 778 ssize_t remain; 779 loff_t offset, page_base; 780 char __user *user_data; 781 int page_offset, page_length, ret; 782 783 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK); 784 if (ret) 785 goto out; 786 787 ret = i915_gem_object_set_to_gtt_domain(obj, true); 788 if (ret) 789 goto out_unpin; 790 791 ret = i915_gem_object_put_fence(obj); 792 if (ret) 793 goto out_unpin; 794 795 user_data = to_user_ptr(args->data_ptr); 796 remain = args->size; 797 798 offset = i915_gem_obj_ggtt_offset(obj) + args->offset; 799 800 while (remain > 0) { 801 /* Operation in this page 802 * 803 * page_base = page offset within aperture 804 * page_offset = offset within page 805 * page_length = bytes to copy for this page 806 */ 807 page_base = offset & ~PAGE_MASK; 808 page_offset = offset_in_page(offset); 809 page_length = remain; 810 if ((page_offset + remain) > PAGE_SIZE) 811 page_length = PAGE_SIZE - page_offset; 812 813 /* If we get a fault while copying data, then (presumably) our 814 * source page isn't available. Return the error and we'll 815 * retry in the slow path. 816 */ 817 #if 0 818 if (fast_user_write(dev_priv->gtt.mappable, page_base, 819 page_offset, user_data, page_length)) { 820 #else 821 if (i915_gem_gtt_write(dev, obj, args->data_ptr, args->size, args->offset, file)) { 822 #endif 823 ret = -EFAULT; 824 goto out_unpin; 825 } 826 827 remain -= page_length; 828 user_data += page_length; 829 offset += page_length; 830 } 831 832 out_unpin: 833 i915_gem_object_ggtt_unpin(obj); 834 out: 835 return ret; 836 } 837 838 /* Per-page copy function for the shmem pwrite fastpath. 839 * Flushes invalid cachelines before writing to the target if 840 * needs_clflush_before is set and flushes out any written cachelines after 841 * writing if needs_clflush is set. */ 842 static int 843 shmem_pwrite_fast(struct vm_page *page, int shmem_page_offset, int page_length, 844 char __user *user_data, 845 bool page_do_bit17_swizzling, 846 bool needs_clflush_before, 847 bool needs_clflush_after) 848 { 849 char *vaddr; 850 int ret; 851 852 if (unlikely(page_do_bit17_swizzling)) 853 return -EINVAL; 854 855 vaddr = kmap_atomic(page); 856 if (needs_clflush_before) 857 drm_clflush_virt_range(vaddr + shmem_page_offset, 858 page_length); 859 ret = __copy_from_user_inatomic(vaddr + shmem_page_offset, 860 user_data, page_length); 861 if (needs_clflush_after) 862 drm_clflush_virt_range(vaddr + shmem_page_offset, 863 page_length); 864 kunmap_atomic(vaddr); 865 866 return ret ? -EFAULT : 0; 867 } 868 869 /* Only difference to the fast-path function is that this can handle bit17 870 * and uses non-atomic copy and kmap functions. */ 871 static int 872 shmem_pwrite_slow(struct vm_page *page, int shmem_page_offset, int page_length, 873 char __user *user_data, 874 bool page_do_bit17_swizzling, 875 bool needs_clflush_before, 876 bool needs_clflush_after) 877 { 878 char *vaddr; 879 int ret; 880 881 vaddr = kmap(page); 882 if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) 883 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 884 page_length, 885 page_do_bit17_swizzling); 886 if (page_do_bit17_swizzling) 887 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset, 888 user_data, 889 page_length); 890 else 891 ret = __copy_from_user(vaddr + shmem_page_offset, 892 user_data, 893 page_length); 894 if (needs_clflush_after) 895 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 896 page_length, 897 page_do_bit17_swizzling); 898 kunmap(page); 899 900 return ret ? -EFAULT : 0; 901 } 902 903 static int 904 i915_gem_shmem_pwrite(struct drm_device *dev, 905 struct drm_i915_gem_object *obj, 906 struct drm_i915_gem_pwrite *args, 907 struct drm_file *file) 908 { 909 ssize_t remain; 910 loff_t offset; 911 char __user *user_data; 912 int shmem_page_offset, page_length, ret = 0; 913 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 914 int hit_slowpath = 0; 915 int needs_clflush_after = 0; 916 int needs_clflush_before = 0; 917 int i; 918 919 user_data = to_user_ptr(args->data_ptr); 920 remain = args->size; 921 922 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 923 924 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 925 /* If we're not in the cpu write domain, set ourself into the gtt 926 * write domain and manually flush cachelines (if required). This 927 * optimizes for the case when the gpu will use the data 928 * right away and we therefore have to clflush anyway. */ 929 needs_clflush_after = cpu_write_needs_clflush(obj); 930 ret = i915_gem_object_wait_rendering(obj, false); 931 if (ret) 932 return ret; 933 934 i915_gem_object_retire(obj); 935 } 936 /* Same trick applies to invalidate partially written cachelines read 937 * before writing. */ 938 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) 939 needs_clflush_before = 940 !cpu_cache_is_coherent(dev, obj->cache_level); 941 942 ret = i915_gem_object_get_pages(obj); 943 if (ret) 944 return ret; 945 946 i915_gem_object_pin_pages(obj); 947 948 offset = args->offset; 949 obj->dirty = 1; 950 951 VM_OBJECT_LOCK(obj->base.vm_obj); 952 vm_object_pip_add(obj->base.vm_obj, 1); 953 for (i = 0; i < (obj->base.size >> PAGE_SHIFT); i++) { 954 struct vm_page *page = obj->pages[i]; 955 int partial_cacheline_write; 956 957 if (i < offset >> PAGE_SHIFT) 958 continue; 959 960 if (remain <= 0) 961 break; 962 963 /* Operation in this page 964 * 965 * shmem_page_offset = offset within page in shmem file 966 * page_length = bytes to copy for this page 967 */ 968 shmem_page_offset = offset_in_page(offset); 969 970 page_length = remain; 971 if ((shmem_page_offset + page_length) > PAGE_SIZE) 972 page_length = PAGE_SIZE - shmem_page_offset; 973 974 /* If we don't overwrite a cacheline completely we need to be 975 * careful to have up-to-date data by first clflushing. Don't 976 * overcomplicate things and flush the entire patch. */ 977 partial_cacheline_write = needs_clflush_before && 978 ((shmem_page_offset | page_length) 979 & (cpu_clflush_line_size - 1)); 980 981 page_do_bit17_swizzling = obj_do_bit17_swizzling && 982 (page_to_phys(page) & (1 << 17)) != 0; 983 984 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length, 985 user_data, page_do_bit17_swizzling, 986 partial_cacheline_write, 987 needs_clflush_after); 988 if (ret == 0) 989 goto next_page; 990 991 hit_slowpath = 1; 992 mutex_unlock(&dev->struct_mutex); 993 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length, 994 user_data, page_do_bit17_swizzling, 995 partial_cacheline_write, 996 needs_clflush_after); 997 998 mutex_lock(&dev->struct_mutex); 999 1000 if (ret) 1001 goto out; 1002 1003 next_page: 1004 remain -= page_length; 1005 user_data += page_length; 1006 offset += page_length; 1007 } 1008 vm_object_pip_wakeup(obj->base.vm_obj); 1009 VM_OBJECT_UNLOCK(obj->base.vm_obj); 1010 1011 out: 1012 i915_gem_object_unpin_pages(obj); 1013 1014 if (hit_slowpath) { 1015 /* 1016 * Fixup: Flush cpu caches in case we didn't flush the dirty 1017 * cachelines in-line while writing and the object moved 1018 * out of the cpu write domain while we've dropped the lock. 1019 */ 1020 if (!needs_clflush_after && 1021 obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 1022 if (i915_gem_clflush_object(obj, obj->pin_display)) 1023 i915_gem_chipset_flush(dev); 1024 } 1025 } 1026 1027 if (needs_clflush_after) 1028 i915_gem_chipset_flush(dev); 1029 1030 return ret; 1031 } 1032 1033 /** 1034 * Writes data to the object referenced by handle. 1035 * 1036 * On error, the contents of the buffer that were to be modified are undefined. 1037 */ 1038 int 1039 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 1040 struct drm_file *file) 1041 { 1042 struct drm_i915_gem_pwrite *args = data; 1043 struct drm_i915_gem_object *obj; 1044 int ret; 1045 1046 if (args->size == 0) 1047 return 0; 1048 1049 if (likely(!i915.prefault_disable)) { 1050 ret = fault_in_multipages_readable(to_user_ptr(args->data_ptr), 1051 args->size); 1052 if (ret) 1053 return -EFAULT; 1054 } 1055 1056 ret = i915_mutex_lock_interruptible(dev); 1057 if (ret) 1058 return ret; 1059 1060 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1061 if (&obj->base == NULL) { 1062 ret = -ENOENT; 1063 goto unlock; 1064 } 1065 1066 /* Bounds check destination. */ 1067 if (args->offset > obj->base.size || 1068 args->size > obj->base.size - args->offset) { 1069 ret = -EINVAL; 1070 goto out; 1071 } 1072 1073 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 1074 1075 ret = -EFAULT; 1076 /* We can only do the GTT pwrite on untiled buffers, as otherwise 1077 * it would end up going through the fenced access, and we'll get 1078 * different detiling behavior between reading and writing. 1079 * pread/pwrite currently are reading and writing from the CPU 1080 * perspective, requiring manual detiling by the client. 1081 */ 1082 if (obj->phys_handle) { 1083 ret = i915_gem_phys_pwrite(obj, args, file); 1084 goto out; 1085 } 1086 1087 if (obj->tiling_mode == I915_TILING_NONE && 1088 obj->base.write_domain != I915_GEM_DOMAIN_CPU && 1089 cpu_write_needs_clflush(obj)) { 1090 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); 1091 /* Note that the gtt paths might fail with non-page-backed user 1092 * pointers (e.g. gtt mappings when moving data between 1093 * textures). Fallback to the shmem path in that case. */ 1094 } 1095 1096 if (ret == -EFAULT || ret == -ENOSPC) 1097 ret = i915_gem_shmem_pwrite(dev, obj, args, file); 1098 1099 out: 1100 drm_gem_object_unreference(&obj->base); 1101 unlock: 1102 mutex_unlock(&dev->struct_mutex); 1103 return ret; 1104 } 1105 1106 int 1107 i915_gem_check_wedge(struct i915_gpu_error *error, 1108 bool interruptible) 1109 { 1110 if (i915_reset_in_progress(error)) { 1111 /* Non-interruptible callers can't handle -EAGAIN, hence return 1112 * -EIO unconditionally for these. */ 1113 if (!interruptible) 1114 return -EIO; 1115 1116 /* Recovery complete, but the reset failed ... */ 1117 if (i915_terminally_wedged(error)) 1118 return -EIO; 1119 1120 return -EAGAIN; 1121 } 1122 1123 return 0; 1124 } 1125 1126 /* 1127 * Compare seqno against outstanding lazy request. Emit a request if they are 1128 * equal. 1129 */ 1130 int 1131 i915_gem_check_olr(struct intel_engine_cs *ring, u32 seqno) 1132 { 1133 int ret; 1134 1135 BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex)); 1136 1137 ret = 0; 1138 if (seqno == ring->outstanding_lazy_seqno) 1139 ret = i915_add_request(ring, NULL); 1140 1141 return ret; 1142 } 1143 1144 #if 0 1145 static void fake_irq(unsigned long data) 1146 { 1147 wake_up_process((struct task_struct *)data); 1148 } 1149 1150 static bool missed_irq(struct drm_i915_private *dev_priv, 1151 struct intel_engine_cs *ring) 1152 { 1153 return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings); 1154 } 1155 1156 static bool can_wait_boost(struct drm_i915_file_private *file_priv) 1157 { 1158 if (file_priv == NULL) 1159 return true; 1160 1161 return !atomic_xchg(&file_priv->rps_wait_boost, true); 1162 } 1163 #endif 1164 1165 /** 1166 * __wait_seqno - wait until execution of seqno has finished 1167 * @ring: the ring expected to report seqno 1168 * @seqno: duh! 1169 * @reset_counter: reset sequence associated with the given seqno 1170 * @interruptible: do an interruptible wait (normally yes) 1171 * @timeout: in - how long to wait (NULL forever); out - how much time remaining 1172 * 1173 * Note: It is of utmost importance that the passed in seqno and reset_counter 1174 * values have been read by the caller in an smp safe manner. Where read-side 1175 * locks are involved, it is sufficient to read the reset_counter before 1176 * unlocking the lock that protects the seqno. For lockless tricks, the 1177 * reset_counter _must_ be read before, and an appropriate smp_rmb must be 1178 * inserted. 1179 * 1180 * Returns 0 if the seqno was found within the alloted time. Else returns the 1181 * errno with remaining time filled in timeout argument. 1182 */ 1183 static int __wait_seqno(struct intel_engine_cs *ring, u32 seqno, 1184 unsigned reset_counter, 1185 bool interruptible, 1186 struct timespec *timeout, 1187 struct drm_i915_file_private *file_priv) 1188 { 1189 struct drm_i915_private *dev_priv = ring->dev->dev_private; 1190 struct timespec before, now, wait_time={1,0}; 1191 unsigned long timeout_jiffies; 1192 long end; 1193 bool wait_forever = true; 1194 int ret; 1195 1196 WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled"); 1197 1198 if (i915_seqno_passed(ring->get_seqno(ring, true), seqno)) 1199 return 0; 1200 1201 if (timeout != NULL) { 1202 wait_time = *timeout; 1203 wait_forever = false; 1204 } 1205 1206 timeout_jiffies = timespec_to_jiffies_timeout(&wait_time); 1207 1208 if (WARN_ON(!ring->irq_get(ring))) 1209 return -ENODEV; 1210 1211 /* Record current time in case interrupted by signal, or wedged */ 1212 trace_i915_gem_request_wait_begin(ring, seqno); 1213 getrawmonotonic(&before); 1214 1215 #define EXIT_COND \ 1216 (i915_seqno_passed(ring->get_seqno(ring, false), seqno) || \ 1217 i915_reset_in_progress(&dev_priv->gpu_error) || \ 1218 reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) 1219 do { 1220 if (interruptible) 1221 end = wait_event_interruptible_timeout(ring->irq_queue, 1222 EXIT_COND, 1223 timeout_jiffies); 1224 else 1225 end = wait_event_timeout(ring->irq_queue, EXIT_COND, 1226 timeout_jiffies); 1227 1228 /* We need to check whether any gpu reset happened in between 1229 * the caller grabbing the seqno and now ... */ 1230 if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) 1231 end = -EAGAIN; 1232 1233 /* ... but upgrade the -EGAIN to an -EIO if the gpu is truely 1234 * gone. */ 1235 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); 1236 if (ret) 1237 end = ret; 1238 } while (end == 0 && wait_forever); 1239 1240 getrawmonotonic(&now); 1241 1242 ring->irq_put(ring); 1243 trace_i915_gem_request_wait_end(ring, seqno); 1244 #undef EXIT_COND 1245 1246 if (timeout) { 1247 struct timespec sleep_time = timespec_sub(now, before); 1248 *timeout = timespec_sub(*timeout, sleep_time); 1249 if (!timespec_valid(timeout)) /* i.e. negative time remains */ 1250 set_normalized_timespec(timeout, 0, 0); 1251 } 1252 1253 switch (end) { 1254 case -EIO: 1255 case -EAGAIN: /* Wedged */ 1256 case -ERESTARTSYS: /* Signal */ 1257 return (int)end; 1258 case 0: /* Timeout */ 1259 return -ETIMEDOUT; /* -ETIME on Linux */ 1260 default: /* Completed */ 1261 WARN_ON(end < 0); /* We're not aware of other errors */ 1262 return 0; 1263 } 1264 } 1265 1266 /** 1267 * Waits for a sequence number to be signaled, and cleans up the 1268 * request and object lists appropriately for that event. 1269 */ 1270 int 1271 i915_wait_seqno(struct intel_engine_cs *ring, uint32_t seqno) 1272 { 1273 struct drm_device *dev = ring->dev; 1274 struct drm_i915_private *dev_priv = dev->dev_private; 1275 bool interruptible = dev_priv->mm.interruptible; 1276 int ret; 1277 1278 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1279 BUG_ON(seqno == 0); 1280 1281 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); 1282 if (ret) 1283 return ret; 1284 1285 ret = i915_gem_check_olr(ring, seqno); 1286 if (ret) 1287 return ret; 1288 1289 return __wait_seqno(ring, seqno, 1290 atomic_read(&dev_priv->gpu_error.reset_counter), 1291 interruptible, NULL, NULL); 1292 } 1293 1294 static int 1295 i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj, 1296 struct intel_engine_cs *ring) 1297 { 1298 if (!obj->active) 1299 return 0; 1300 1301 /* Manually manage the write flush as we may have not yet 1302 * retired the buffer. 1303 * 1304 * Note that the last_write_seqno is always the earlier of 1305 * the two (read/write) seqno, so if we haved successfully waited, 1306 * we know we have passed the last write. 1307 */ 1308 obj->last_write_seqno = 0; 1309 1310 return 0; 1311 } 1312 1313 /** 1314 * Ensures that all rendering to the object has completed and the object is 1315 * safe to unbind from the GTT or access from the CPU. 1316 */ 1317 static __must_check int 1318 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, 1319 bool readonly) 1320 { 1321 struct intel_engine_cs *ring = obj->ring; 1322 u32 seqno; 1323 int ret; 1324 1325 seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno; 1326 if (seqno == 0) 1327 return 0; 1328 1329 ret = i915_wait_seqno(ring, seqno); 1330 if (ret) 1331 return ret; 1332 1333 return i915_gem_object_wait_rendering__tail(obj, ring); 1334 } 1335 1336 /* A nonblocking variant of the above wait. This is a highly dangerous routine 1337 * as the object state may change during this call. 1338 */ 1339 static __must_check int 1340 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, 1341 struct drm_i915_file_private *file_priv, 1342 bool readonly) 1343 { 1344 struct drm_device *dev = obj->base.dev; 1345 struct drm_i915_private *dev_priv = dev->dev_private; 1346 struct intel_engine_cs *ring = obj->ring; 1347 unsigned reset_counter; 1348 u32 seqno; 1349 int ret; 1350 1351 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1352 BUG_ON(!dev_priv->mm.interruptible); 1353 1354 seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno; 1355 if (seqno == 0) 1356 return 0; 1357 1358 ret = i915_gem_check_wedge(&dev_priv->gpu_error, true); 1359 if (ret) 1360 return ret; 1361 1362 ret = i915_gem_check_olr(ring, seqno); 1363 if (ret) 1364 return ret; 1365 1366 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 1367 mutex_unlock(&dev->struct_mutex); 1368 ret = __wait_seqno(ring, seqno, reset_counter, true, NULL, file_priv); 1369 mutex_lock(&dev->struct_mutex); 1370 if (ret) 1371 return ret; 1372 1373 return i915_gem_object_wait_rendering__tail(obj, ring); 1374 } 1375 1376 /** 1377 * Called when user space prepares to use an object with the CPU, either 1378 * through the mmap ioctl's mapping or a GTT mapping. 1379 */ 1380 int 1381 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1382 struct drm_file *file) 1383 { 1384 struct drm_i915_gem_set_domain *args = data; 1385 struct drm_i915_gem_object *obj; 1386 uint32_t read_domains = args->read_domains; 1387 uint32_t write_domain = args->write_domain; 1388 int ret; 1389 1390 /* Only handle setting domains to types used by the CPU. */ 1391 if (write_domain & I915_GEM_GPU_DOMAINS) 1392 return -EINVAL; 1393 1394 if (read_domains & I915_GEM_GPU_DOMAINS) 1395 return -EINVAL; 1396 1397 /* Having something in the write domain implies it's in the read 1398 * domain, and only that read domain. Enforce that in the request. 1399 */ 1400 if (write_domain != 0 && read_domains != write_domain) 1401 return -EINVAL; 1402 1403 ret = i915_mutex_lock_interruptible(dev); 1404 if (ret) 1405 return ret; 1406 1407 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1408 if (&obj->base == NULL) { 1409 ret = -ENOENT; 1410 goto unlock; 1411 } 1412 1413 /* Try to flush the object off the GPU without holding the lock. 1414 * We will repeat the flush holding the lock in the normal manner 1415 * to catch cases where we are gazumped. 1416 */ 1417 ret = i915_gem_object_wait_rendering__nonblocking(obj, 1418 file->driver_priv, 1419 !write_domain); 1420 if (ret) 1421 goto unref; 1422 1423 if (read_domains & I915_GEM_DOMAIN_GTT) { 1424 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 1425 1426 /* Silently promote "you're not bound, there was nothing to do" 1427 * to success, since the client was just asking us to 1428 * make sure everything was done. 1429 */ 1430 if (ret == -EINVAL) 1431 ret = 0; 1432 } else { 1433 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 1434 } 1435 1436 unref: 1437 drm_gem_object_unreference(&obj->base); 1438 unlock: 1439 mutex_unlock(&dev->struct_mutex); 1440 return ret; 1441 } 1442 1443 /** 1444 * Called when user space has done writes to this buffer 1445 */ 1446 int 1447 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1448 struct drm_file *file) 1449 { 1450 struct drm_i915_gem_sw_finish *args = data; 1451 struct drm_i915_gem_object *obj; 1452 int ret = 0; 1453 1454 ret = i915_mutex_lock_interruptible(dev); 1455 if (ret) 1456 return ret; 1457 1458 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1459 if (&obj->base == NULL) { 1460 ret = -ENOENT; 1461 goto unlock; 1462 } 1463 1464 /* Pinned buffers may be scanout, so flush the cache */ 1465 if (obj->pin_display) 1466 i915_gem_object_flush_cpu_write_domain(obj, true); 1467 1468 drm_gem_object_unreference(&obj->base); 1469 unlock: 1470 mutex_unlock(&dev->struct_mutex); 1471 return ret; 1472 } 1473 1474 /** 1475 * Maps the contents of an object, returning the address it is mapped 1476 * into. 1477 * 1478 * While the mapping holds a reference on the contents of the object, it doesn't 1479 * imply a ref on the object itself. 1480 */ 1481 int 1482 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1483 struct drm_file *file) 1484 { 1485 struct drm_i915_gem_mmap *args = data; 1486 struct drm_gem_object *obj; 1487 unsigned long addr; 1488 struct proc *p = curproc; 1489 vm_map_t map = &p->p_vmspace->vm_map; 1490 vm_size_t size; 1491 int error = 0, rv; 1492 1493 obj = drm_gem_object_lookup(dev, file, args->handle); 1494 if (obj == NULL) 1495 return -ENOENT; 1496 1497 if (args->size == 0) 1498 goto out; 1499 1500 size = round_page(args->size); 1501 if (map->size + size > p->p_rlimit[RLIMIT_VMEM].rlim_cur) { 1502 error = -ENOMEM; 1503 goto out; 1504 } 1505 1506 /* 1507 * Call hint to ensure that NULL is not returned as a valid address 1508 * and to reduce vm_map traversals. XXX causes instability, use a 1509 * fixed low address as the start point instead to avoid the NULL 1510 * return issue. 1511 */ 1512 addr = PAGE_SIZE; 1513 1514 /* 1515 * Use 256KB alignment. It is unclear why this matters for a 1516 * virtual address but it appears to fix a number of application/X 1517 * crashes and kms console switching is much faster. 1518 */ 1519 vm_object_hold(obj->vm_obj); 1520 vm_object_reference_locked(obj->vm_obj); 1521 vm_object_drop(obj->vm_obj); 1522 1523 rv = vm_map_find(map, obj->vm_obj, NULL, 1524 args->offset, &addr, args->size, 1525 256 * 1024, /* align */ 1526 TRUE, /* fitit */ 1527 VM_MAPTYPE_NORMAL, /* maptype */ 1528 VM_PROT_READ | VM_PROT_WRITE, /* prot */ 1529 VM_PROT_READ | VM_PROT_WRITE, /* max */ 1530 MAP_SHARED /* cow */); 1531 if (rv != KERN_SUCCESS) { 1532 vm_object_deallocate(obj->vm_obj); 1533 error = -vm_mmap_to_errno(rv); 1534 } else { 1535 args->addr_ptr = (uint64_t)addr; 1536 } 1537 out: 1538 drm_gem_object_unreference(obj); 1539 return (error); 1540 } 1541 1542 /** 1543 * i915_gem_fault - fault a page into the GTT 1544 * 1545 * vm_obj is locked on entry and expected to be locked on return. 1546 * 1547 * The vm_pager has placemarked the object with an anonymous memory page 1548 * which we must replace atomically to avoid races against concurrent faults 1549 * on the same page. XXX we currently are unable to do this atomically. 1550 * 1551 * If we are to return an error we should not touch the anonymous page, 1552 * the caller will deallocate it. 1553 * 1554 * XXX Most GEM calls appear to be interruptable, but we can't hard loop 1555 * in that case. Release all resources and wait 1 tick before retrying. 1556 * This is a huge problem which needs to be fixed by getting rid of most 1557 * of the interruptability. The linux code does not retry but does appear 1558 * to have some sort of mechanism (VM_FAULT_NOPAGE ?) for the higher level 1559 * to be able to retry. 1560 * 1561 * -- 1562 * 1563 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1564 * from userspace. The fault handler takes care of binding the object to 1565 * the GTT (if needed), allocating and programming a fence register (again, 1566 * only if needed based on whether the old reg is still valid or the object 1567 * is tiled) and inserting a new PTE into the faulting process. 1568 * 1569 * Note that the faulting process may involve evicting existing objects 1570 * from the GTT and/or fence registers to make room. So performance may 1571 * suffer if the GTT working set is large or there are few fence registers 1572 * left. 1573 * 1574 * vm_obj is locked on entry and expected to be locked on return. The VM 1575 * pager has placed an anonymous memory page at (obj,offset) which we have 1576 * to replace. 1577 */ 1578 int i915_gem_fault(vm_object_t vm_obj, vm_ooffset_t offset, int prot, vm_page_t *mres) 1579 { 1580 struct drm_i915_gem_object *obj = to_intel_bo(vm_obj->handle); 1581 struct drm_device *dev = obj->base.dev; 1582 struct drm_i915_private *dev_priv = dev->dev_private; 1583 unsigned long page_offset; 1584 vm_page_t m, oldm = NULL; 1585 int ret = 0; 1586 int didpip = 0; 1587 bool write = !!(prot & VM_PROT_WRITE); 1588 1589 intel_runtime_pm_get(dev_priv); 1590 1591 /* We don't use vmf->pgoff since that has the fake offset */ 1592 page_offset = (unsigned long)offset; 1593 1594 retry: 1595 ret = i915_mutex_lock_interruptible(dev); 1596 if (ret) 1597 goto out; 1598 1599 trace_i915_gem_object_fault(obj, page_offset, true, write); 1600 1601 /* Try to flush the object off the GPU first without holding the lock. 1602 * Upon reacquiring the lock, we will perform our sanity checks and then 1603 * repeat the flush holding the lock in the normal manner to catch cases 1604 * where we are gazumped. 1605 */ 1606 ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write); 1607 if (ret) 1608 goto unlock; 1609 1610 /* Access to snoopable pages through the GTT is incoherent. */ 1611 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) { 1612 ret = -EFAULT; 1613 goto unlock; 1614 } 1615 1616 /* 1617 * START FREEBSD MAGIC 1618 * 1619 * Add a pip count to avoid destruction and certain other 1620 * complex operations (such as collapses?) while unlocked. 1621 */ 1622 if (didpip == 0) { 1623 vm_object_pip_add(vm_obj, 1); 1624 didpip = 1; 1625 } 1626 1627 /* 1628 * XXX We must currently remove the placeholder page now to avoid 1629 * a deadlock against a concurrent i915_gem_release_mmap(). 1630 * Otherwise concurrent operation will block on the busy page 1631 * while holding locks which we need to obtain. 1632 */ 1633 if (*mres != NULL) { 1634 oldm = *mres; 1635 vm_page_remove(oldm); 1636 *mres = NULL; 1637 } else { 1638 oldm = NULL; 1639 } 1640 1641 VM_OBJECT_UNLOCK(vm_obj); 1642 ret = 0; 1643 m = NULL; 1644 1645 /* 1646 * Since the object lock was dropped, another thread might have 1647 * faulted on the same GTT address and instantiated the mapping. 1648 * Recheck. 1649 */ 1650 VM_OBJECT_LOCK(vm_obj); 1651 m = vm_page_lookup(vm_obj, OFF_TO_IDX(offset)); 1652 if (m != NULL) { 1653 /* 1654 * Try to busy the page, retry on failure (non-zero ret). 1655 */ 1656 if (vm_page_busy_try(m, false)) { 1657 kprintf("i915_gem_fault: PG_BUSY\n"); 1658 VM_OBJECT_UNLOCK(vm_obj); 1659 mutex_unlock(&dev->struct_mutex); 1660 int dummy; 1661 tsleep(&dummy, 0, "delay", 1); /* XXX */ 1662 VM_OBJECT_LOCK(vm_obj); 1663 goto retry; 1664 } 1665 goto have_page; 1666 } 1667 /* 1668 * END FREEBSD MAGIC 1669 */ 1670 1671 /* 1672 * Object must be unlocked here to avoid deadlock during 1673 * other GEM calls. All goto targets expect the object to 1674 * be locked. 1675 */ 1676 VM_OBJECT_UNLOCK(vm_obj); 1677 1678 /* Now bind it into the GTT if needed */ 1679 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE); 1680 if (ret) { 1681 VM_OBJECT_LOCK(vm_obj); 1682 goto unlock; 1683 } 1684 1685 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1686 if (ret) { 1687 VM_OBJECT_LOCK(vm_obj); 1688 goto unpin; 1689 } 1690 1691 ret = i915_gem_object_get_fence(obj); 1692 if (ret) { 1693 VM_OBJECT_LOCK(vm_obj); 1694 goto unpin; 1695 } 1696 1697 obj->fault_mappable = true; 1698 1699 /* 1700 * Relock object for insertion, leave locked for return. 1701 */ 1702 VM_OBJECT_LOCK(vm_obj); 1703 m = vm_phys_fictitious_to_vm_page(dev->agp->base + 1704 i915_gem_obj_ggtt_offset(obj) + 1705 offset); 1706 if (m == NULL) { 1707 ret = -EFAULT; 1708 goto unpin; 1709 } 1710 KASSERT((m->flags & PG_FICTITIOUS) != 0, ("not fictitious %p", m)); 1711 KASSERT(m->wire_count == 1, ("wire_count not 1 %p", m)); 1712 1713 /* 1714 * Try to busy the page. Fails on non-zero return. 1715 */ 1716 if (vm_page_busy_try(m, false)) { 1717 VM_OBJECT_UNLOCK(vm_obj); 1718 i915_gem_object_ggtt_unpin(obj); 1719 kprintf("i915_gem_fault: PG_BUSY(2)\n"); 1720 i915_gem_object_ggtt_unpin(obj); 1721 mutex_unlock(&dev->struct_mutex); 1722 int dummy; 1723 tsleep(&dummy, 0, "delay", 1); /* XXX */ 1724 VM_OBJECT_LOCK(vm_obj); 1725 goto retry; 1726 } 1727 m->valid = VM_PAGE_BITS_ALL; 1728 1729 /* 1730 * Finally, remap it using the new GTT offset. 1731 * 1732 * (object expected to be in a locked state) 1733 */ 1734 vm_page_insert(m, vm_obj, OFF_TO_IDX(offset)); 1735 have_page: 1736 *mres = m; 1737 1738 i915_gem_object_ggtt_unpin(obj); 1739 mutex_unlock(&dev->struct_mutex); 1740 if (oldm != NULL) 1741 vm_page_free(oldm); 1742 if (didpip) 1743 vm_object_pip_wakeup(vm_obj); 1744 return (VM_PAGER_OK); 1745 1746 /* 1747 * ALTERNATIVE ERROR RETURN. 1748 * 1749 * OBJECT EXPECTED TO BE LOCKED. 1750 */ 1751 unpin: 1752 i915_gem_object_ggtt_unpin(obj); 1753 unlock: 1754 mutex_unlock(&dev->struct_mutex); 1755 out: 1756 switch (ret) { 1757 case -EIO: 1758 /* 1759 * We eat errors when the gpu is terminally wedged to avoid 1760 * userspace unduly crashing (gl has no provisions for mmaps to 1761 * fail). But any other -EIO isn't ours (e.g. swap in failure) 1762 * and so needs to be reported. 1763 */ 1764 if (!i915_terminally_wedged(&dev_priv->gpu_error)) { 1765 // ret = VM_FAULT_SIGBUS; 1766 break; 1767 } 1768 /* fall through */ 1769 case -EAGAIN: 1770 /* 1771 * EAGAIN means the gpu is hung and we'll wait for the error 1772 * handler to reset everything when re-faulting in 1773 * i915_mutex_lock_interruptible. 1774 */ 1775 /* fall through */ 1776 case -ERESTARTSYS: 1777 case -EINTR: 1778 VM_OBJECT_UNLOCK(vm_obj); 1779 int dummy; 1780 tsleep(&dummy, 0, "delay", 1); /* XXX */ 1781 VM_OBJECT_LOCK(vm_obj); 1782 goto retry; 1783 default: 1784 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 1785 ret = VM_PAGER_ERROR; 1786 break; 1787 } 1788 1789 intel_runtime_pm_put(dev_priv); 1790 1791 /* 1792 * Error return. We already NULL'd out *mres so we should be able 1793 * to free (oldm) here even though we are returning an error and the 1794 * caller usually handles the freeing. 1795 */ 1796 if (oldm != NULL) 1797 vm_page_free(oldm); 1798 if (didpip) 1799 vm_object_pip_wakeup(vm_obj); 1800 1801 return ret; 1802 } 1803 1804 /** 1805 * i915_gem_release_mmap - remove physical page mappings 1806 * @obj: obj in question 1807 * 1808 * Preserve the reservation of the mmapping with the DRM core code, but 1809 * relinquish ownership of the pages back to the system. 1810 * 1811 * It is vital that we remove the page mapping if we have mapped a tiled 1812 * object through the GTT and then lose the fence register due to 1813 * resource pressure. Similarly if the object has been moved out of the 1814 * aperture, than pages mapped into userspace must be revoked. Removing the 1815 * mapping will then trigger a page fault on the next user access, allowing 1816 * fixup by i915_gem_fault(). 1817 */ 1818 void 1819 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 1820 { 1821 vm_object_t devobj; 1822 vm_page_t m; 1823 int i, page_count; 1824 1825 if (!obj->fault_mappable) 1826 return; 1827 1828 devobj = cdev_pager_lookup(obj); 1829 if (devobj != NULL) { 1830 page_count = OFF_TO_IDX(obj->base.size); 1831 1832 VM_OBJECT_LOCK(devobj); 1833 for (i = 0; i < page_count; i++) { 1834 m = vm_page_lookup_busy_wait(devobj, i, TRUE, "915unm"); 1835 if (m == NULL) 1836 continue; 1837 cdev_pager_free_page(devobj, m); 1838 } 1839 VM_OBJECT_UNLOCK(devobj); 1840 vm_object_deallocate(devobj); 1841 } 1842 1843 obj->fault_mappable = false; 1844 } 1845 1846 void 1847 i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv) 1848 { 1849 struct drm_i915_gem_object *obj; 1850 1851 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) 1852 i915_gem_release_mmap(obj); 1853 } 1854 1855 uint32_t 1856 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) 1857 { 1858 uint32_t gtt_size; 1859 1860 if (INTEL_INFO(dev)->gen >= 4 || 1861 tiling_mode == I915_TILING_NONE) 1862 return size; 1863 1864 /* Previous chips need a power-of-two fence region when tiling */ 1865 if (INTEL_INFO(dev)->gen == 3) 1866 gtt_size = 1024*1024; 1867 else 1868 gtt_size = 512*1024; 1869 1870 while (gtt_size < size) 1871 gtt_size <<= 1; 1872 1873 return gtt_size; 1874 } 1875 1876 /** 1877 * i915_gem_get_gtt_alignment - return required GTT alignment for an object 1878 * @obj: object to check 1879 * 1880 * Return the required GTT alignment for an object, taking into account 1881 * potential fence register mapping. 1882 */ 1883 uint32_t 1884 i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size, 1885 int tiling_mode, bool fenced) 1886 { 1887 /* 1888 * Minimum alignment is 4k (GTT page size), but might be greater 1889 * if a fence register is needed for the object. 1890 */ 1891 if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) || 1892 tiling_mode == I915_TILING_NONE) 1893 return 4096; 1894 1895 /* 1896 * Previous chips need to be aligned to the size of the smallest 1897 * fence register that can contain the object. 1898 */ 1899 return i915_gem_get_gtt_size(dev, size, tiling_mode); 1900 } 1901 1902 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) 1903 { 1904 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 1905 int ret; 1906 1907 #if 0 1908 if (drm_vma_node_has_offset(&obj->base.vma_node)) 1909 return 0; 1910 #endif 1911 1912 dev_priv->mm.shrinker_no_lock_stealing = true; 1913 1914 ret = drm_gem_create_mmap_offset(&obj->base); 1915 if (ret != -ENOSPC) 1916 goto out; 1917 1918 /* Badly fragmented mmap space? The only way we can recover 1919 * space is by destroying unwanted objects. We can't randomly release 1920 * mmap_offsets as userspace expects them to be persistent for the 1921 * lifetime of the objects. The closest we can is to release the 1922 * offsets on purgeable objects by truncating it and marking it purged, 1923 * which prevents userspace from ever using that object again. 1924 */ 1925 i915_gem_purge(dev_priv, obj->base.size >> PAGE_SHIFT); 1926 ret = drm_gem_create_mmap_offset(&obj->base); 1927 if (ret != -ENOSPC) 1928 goto out; 1929 1930 i915_gem_shrink_all(dev_priv); 1931 ret = drm_gem_create_mmap_offset(&obj->base); 1932 out: 1933 dev_priv->mm.shrinker_no_lock_stealing = false; 1934 1935 return ret; 1936 } 1937 1938 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) 1939 { 1940 drm_gem_free_mmap_offset(&obj->base); 1941 } 1942 1943 int 1944 i915_gem_mmap_gtt(struct drm_file *file, 1945 struct drm_device *dev, 1946 uint32_t handle, 1947 uint64_t *offset) 1948 { 1949 struct drm_i915_private *dev_priv = dev->dev_private; 1950 struct drm_i915_gem_object *obj; 1951 int ret; 1952 1953 ret = i915_mutex_lock_interruptible(dev); 1954 if (ret) 1955 return ret; 1956 1957 obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle)); 1958 if (&obj->base == NULL) { 1959 ret = -ENOENT; 1960 goto unlock; 1961 } 1962 1963 if (obj->base.size > dev_priv->gtt.mappable_end) { 1964 ret = -E2BIG; 1965 goto out; 1966 } 1967 1968 if (obj->madv != I915_MADV_WILLNEED) { 1969 DRM_DEBUG("Attempting to mmap a purgeable buffer\n"); 1970 ret = -EFAULT; 1971 goto out; 1972 } 1973 1974 ret = i915_gem_object_create_mmap_offset(obj); 1975 if (ret) 1976 goto out; 1977 1978 *offset = DRM_GEM_MAPPING_OFF(obj->base.map_list.key) | 1979 DRM_GEM_MAPPING_KEY; 1980 1981 out: 1982 drm_gem_object_unreference(&obj->base); 1983 unlock: 1984 mutex_unlock(&dev->struct_mutex); 1985 return ret; 1986 } 1987 1988 /** 1989 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 1990 * @dev: DRM device 1991 * @data: GTT mapping ioctl data 1992 * @file: GEM object info 1993 * 1994 * Simply returns the fake offset to userspace so it can mmap it. 1995 * The mmap call will end up in drm_gem_mmap(), which will set things 1996 * up so we can get faults in the handler above. 1997 * 1998 * The fault handler will take care of binding the object into the GTT 1999 * (since it may have been evicted to make room for something), allocating 2000 * a fence register, and mapping the appropriate aperture address into 2001 * userspace. 2002 */ 2003 int 2004 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 2005 struct drm_file *file) 2006 { 2007 struct drm_i915_gem_mmap_gtt *args = data; 2008 2009 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); 2010 } 2011 2012 static inline int 2013 i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj) 2014 { 2015 return obj->madv == I915_MADV_DONTNEED; 2016 } 2017 2018 /* Immediately discard the backing storage */ 2019 static void 2020 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 2021 { 2022 vm_object_t vm_obj; 2023 2024 vm_obj = obj->base.vm_obj; 2025 VM_OBJECT_LOCK(vm_obj); 2026 vm_object_page_remove(vm_obj, 0, 0, false); 2027 VM_OBJECT_UNLOCK(vm_obj); 2028 2029 obj->madv = __I915_MADV_PURGED; 2030 } 2031 2032 /* Try to discard unwanted pages */ 2033 static void 2034 i915_gem_object_invalidate(struct drm_i915_gem_object *obj) 2035 { 2036 #if 0 2037 struct address_space *mapping; 2038 #endif 2039 2040 switch (obj->madv) { 2041 case I915_MADV_DONTNEED: 2042 i915_gem_object_truncate(obj); 2043 case __I915_MADV_PURGED: 2044 return; 2045 } 2046 2047 #if 0 2048 if (obj->base.filp == NULL) 2049 return; 2050 2051 mapping = file_inode(obj->base.filp)->i_mapping, 2052 invalidate_mapping_pages(mapping, 0, (loff_t)-1); 2053 #endif 2054 } 2055 2056 static void 2057 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) 2058 { 2059 int page_count = obj->base.size / PAGE_SIZE; 2060 int i, ret; 2061 2062 if (!obj->pages) 2063 return; 2064 2065 BUG_ON(obj->madv == __I915_MADV_PURGED); 2066 2067 ret = i915_gem_object_set_to_cpu_domain(obj, true); 2068 if (ret) { 2069 /* In the event of a disaster, abandon all caches and 2070 * hope for the best. 2071 */ 2072 WARN_ON(ret != -EIO); 2073 i915_gem_clflush_object(obj, true); 2074 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 2075 } 2076 2077 if (i915_gem_object_needs_bit17_swizzle(obj)) 2078 i915_gem_object_save_bit_17_swizzle(obj); 2079 2080 if (obj->madv == I915_MADV_DONTNEED) 2081 obj->dirty = 0; 2082 2083 for (i = 0; i < page_count; i++) { 2084 struct vm_page *page = obj->pages[i]; 2085 2086 if (obj->dirty) 2087 set_page_dirty(page); 2088 2089 if (obj->madv == I915_MADV_WILLNEED) 2090 mark_page_accessed(page); 2091 2092 vm_page_busy_wait(obj->pages[i], FALSE, "i915gem"); 2093 vm_page_unwire(obj->pages[i], 1); 2094 vm_page_wakeup(obj->pages[i]); 2095 } 2096 obj->dirty = 0; 2097 2098 kfree(obj->pages); 2099 obj->pages = NULL; 2100 } 2101 2102 int 2103 i915_gem_object_put_pages(struct drm_i915_gem_object *obj) 2104 { 2105 const struct drm_i915_gem_object_ops *ops = obj->ops; 2106 2107 if (obj->pages == NULL) 2108 return 0; 2109 2110 if (obj->pages_pin_count) 2111 return -EBUSY; 2112 2113 BUG_ON(i915_gem_obj_bound_any(obj)); 2114 2115 /* ->put_pages might need to allocate memory for the bit17 swizzle 2116 * array, hence protect them from being reaped by removing them from gtt 2117 * lists early. */ 2118 list_del(&obj->global_list); 2119 2120 ops->put_pages(obj); 2121 obj->pages = NULL; 2122 2123 i915_gem_object_invalidate(obj); 2124 2125 return 0; 2126 } 2127 2128 static unsigned long 2129 __i915_gem_shrink(struct drm_i915_private *dev_priv, long target, 2130 bool purgeable_only) 2131 { 2132 struct list_head still_in_list; 2133 struct drm_i915_gem_object *obj; 2134 unsigned long count = 0; 2135 2136 /* 2137 * As we may completely rewrite the (un)bound list whilst unbinding 2138 * (due to retiring requests) we have to strictly process only 2139 * one element of the list at the time, and recheck the list 2140 * on every iteration. 2141 * 2142 * In particular, we must hold a reference whilst removing the 2143 * object as we may end up waiting for and/or retiring the objects. 2144 * This might release the final reference (held by the active list) 2145 * and result in the object being freed from under us. This is 2146 * similar to the precautions the eviction code must take whilst 2147 * removing objects. 2148 * 2149 * Also note that although these lists do not hold a reference to 2150 * the object we can safely grab one here: The final object 2151 * unreferencing and the bound_list are both protected by the 2152 * dev->struct_mutex and so we won't ever be able to observe an 2153 * object on the bound_list with a reference count equals 0. 2154 */ 2155 INIT_LIST_HEAD(&still_in_list); 2156 while (count < target && !list_empty(&dev_priv->mm.unbound_list)) { 2157 obj = list_first_entry(&dev_priv->mm.unbound_list, 2158 typeof(*obj), global_list); 2159 list_move_tail(&obj->global_list, &still_in_list); 2160 2161 if (!i915_gem_object_is_purgeable(obj) && purgeable_only) 2162 continue; 2163 2164 drm_gem_object_reference(&obj->base); 2165 2166 if (i915_gem_object_put_pages(obj) == 0) 2167 count += obj->base.size >> PAGE_SHIFT; 2168 2169 drm_gem_object_unreference(&obj->base); 2170 } 2171 list_splice(&still_in_list, &dev_priv->mm.unbound_list); 2172 2173 INIT_LIST_HEAD(&still_in_list); 2174 while (count < target && !list_empty(&dev_priv->mm.bound_list)) { 2175 struct i915_vma *vma, *v; 2176 2177 obj = list_first_entry(&dev_priv->mm.bound_list, 2178 typeof(*obj), global_list); 2179 list_move_tail(&obj->global_list, &still_in_list); 2180 2181 if (!i915_gem_object_is_purgeable(obj) && purgeable_only) 2182 continue; 2183 2184 drm_gem_object_reference(&obj->base); 2185 2186 list_for_each_entry_safe(vma, v, &obj->vma_list, vma_link) 2187 if (i915_vma_unbind(vma)) 2188 break; 2189 2190 if (i915_gem_object_put_pages(obj) == 0) 2191 count += obj->base.size >> PAGE_SHIFT; 2192 2193 drm_gem_object_unreference(&obj->base); 2194 } 2195 list_splice(&still_in_list, &dev_priv->mm.bound_list); 2196 2197 return count; 2198 } 2199 2200 static unsigned long 2201 i915_gem_purge(struct drm_i915_private *dev_priv, long target) 2202 { 2203 return __i915_gem_shrink(dev_priv, target, true); 2204 } 2205 2206 static unsigned long 2207 i915_gem_shrink_all(struct drm_i915_private *dev_priv) 2208 { 2209 i915_gem_evict_everything(dev_priv->dev); 2210 return __i915_gem_shrink(dev_priv, LONG_MAX, false); 2211 } 2212 2213 static int 2214 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 2215 { 2216 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2217 int page_count, i, j; 2218 vm_object_t vm_obj; 2219 struct vm_page *page; 2220 2221 /* Assert that the object is not currently in any GPU domain. As it 2222 * wasn't in the GTT, there shouldn't be any way it could have been in 2223 * a GPU cache 2224 */ 2225 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); 2226 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); 2227 2228 page_count = obj->base.size / PAGE_SIZE; 2229 obj->pages = kmalloc(page_count * sizeof(vm_page_t), M_DRM, 2230 M_WAITOK); 2231 2232 /* Get the list of pages out of our struct file. They'll be pinned 2233 * at this point until we release them. 2234 * 2235 * Fail silently without starting the shrinker 2236 */ 2237 vm_obj = obj->base.vm_obj; 2238 VM_OBJECT_LOCK(vm_obj); 2239 for (i = 0; i < page_count; i++) { 2240 page = shmem_read_mapping_page(vm_obj, i); 2241 if (IS_ERR(page)) { 2242 i915_gem_purge(dev_priv, page_count); 2243 page = shmem_read_mapping_page(vm_obj, i); 2244 } 2245 if (IS_ERR(page)) { 2246 /* We've tried hard to allocate the memory by reaping 2247 * our own buffer, now let the real VM do its job and 2248 * go down in flames if truly OOM. 2249 */ 2250 2251 i915_gem_shrink_all(dev_priv); 2252 page = shmem_read_mapping_page(vm_obj, i); 2253 if (IS_ERR(page)) 2254 goto err_pages; 2255 } 2256 #ifdef CONFIG_SWIOTLB 2257 if (swiotlb_nr_tbl()) { 2258 st->nents++; 2259 sg_set_page(sg, page, PAGE_SIZE, 0); 2260 sg = sg_next(sg); 2261 continue; 2262 } 2263 #endif 2264 obj->pages[i] = page; 2265 } 2266 #ifdef CONFIG_SWIOTLB 2267 if (!swiotlb_nr_tbl()) 2268 #endif 2269 VM_OBJECT_UNLOCK(vm_obj); 2270 2271 if (i915_gem_object_needs_bit17_swizzle(obj)) 2272 i915_gem_object_do_bit_17_swizzle(obj); 2273 2274 return 0; 2275 2276 err_pages: 2277 for (j = 0; j < i; j++) { 2278 page = obj->pages[j]; 2279 vm_page_busy_wait(page, FALSE, "i915gem"); 2280 vm_page_unwire(page, 0); 2281 vm_page_wakeup(page); 2282 } 2283 VM_OBJECT_UNLOCK(vm_obj); 2284 kfree(obj->pages); 2285 obj->pages = NULL; 2286 return (-EIO); 2287 } 2288 2289 /* Ensure that the associated pages are gathered from the backing storage 2290 * and pinned into our object. i915_gem_object_get_pages() may be called 2291 * multiple times before they are released by a single call to 2292 * i915_gem_object_put_pages() - once the pages are no longer referenced 2293 * either as a result of memory pressure (reaping pages under the shrinker) 2294 * or as the object is itself released. 2295 */ 2296 int 2297 i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2298 { 2299 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2300 const struct drm_i915_gem_object_ops *ops = obj->ops; 2301 int ret; 2302 2303 if (obj->pages) 2304 return 0; 2305 2306 if (obj->madv != I915_MADV_WILLNEED) { 2307 DRM_DEBUG("Attempting to obtain a purgeable object\n"); 2308 return -EFAULT; 2309 } 2310 2311 BUG_ON(obj->pages_pin_count); 2312 2313 ret = ops->get_pages(obj); 2314 if (ret) 2315 return ret; 2316 2317 list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list); 2318 return 0; 2319 } 2320 2321 static void 2322 i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, 2323 struct intel_engine_cs *ring) 2324 { 2325 struct drm_device *dev = obj->base.dev; 2326 struct drm_i915_private *dev_priv = dev->dev_private; 2327 u32 seqno = intel_ring_get_seqno(ring); 2328 2329 BUG_ON(ring == NULL); 2330 if (obj->ring != ring && obj->last_write_seqno) { 2331 /* Keep the seqno relative to the current ring */ 2332 obj->last_write_seqno = seqno; 2333 } 2334 obj->ring = ring; 2335 2336 /* Add a reference if we're newly entering the active list. */ 2337 if (!obj->active) { 2338 drm_gem_object_reference(&obj->base); 2339 obj->active = 1; 2340 } 2341 2342 list_move_tail(&obj->ring_list, &ring->active_list); 2343 2344 obj->last_read_seqno = seqno; 2345 2346 if (obj->fenced_gpu_access) { 2347 obj->last_fenced_seqno = seqno; 2348 2349 /* Bump MRU to take account of the delayed flush */ 2350 if (obj->fence_reg != I915_FENCE_REG_NONE) { 2351 struct drm_i915_fence_reg *reg; 2352 2353 reg = &dev_priv->fence_regs[obj->fence_reg]; 2354 list_move_tail(®->lru_list, 2355 &dev_priv->mm.fence_list); 2356 } 2357 } 2358 } 2359 2360 void i915_vma_move_to_active(struct i915_vma *vma, 2361 struct intel_engine_cs *ring) 2362 { 2363 list_move_tail(&vma->mm_list, &vma->vm->active_list); 2364 return i915_gem_object_move_to_active(vma->obj, ring); 2365 } 2366 2367 static void 2368 i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj) 2369 { 2370 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2371 struct i915_address_space *vm; 2372 struct i915_vma *vma; 2373 2374 BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS); 2375 BUG_ON(!obj->active); 2376 2377 list_for_each_entry(vm, &dev_priv->vm_list, global_link) { 2378 vma = i915_gem_obj_to_vma(obj, vm); 2379 if (vma && !list_empty(&vma->mm_list)) 2380 list_move_tail(&vma->mm_list, &vm->inactive_list); 2381 } 2382 2383 intel_fb_obj_flush(obj, true); 2384 2385 list_del_init(&obj->ring_list); 2386 obj->ring = NULL; 2387 2388 obj->last_read_seqno = 0; 2389 obj->last_write_seqno = 0; 2390 obj->base.write_domain = 0; 2391 2392 obj->last_fenced_seqno = 0; 2393 obj->fenced_gpu_access = false; 2394 2395 obj->active = 0; 2396 drm_gem_object_unreference(&obj->base); 2397 2398 WARN_ON(i915_verify_lists(dev)); 2399 } 2400 2401 static void 2402 i915_gem_object_retire(struct drm_i915_gem_object *obj) 2403 { 2404 struct intel_engine_cs *ring = obj->ring; 2405 2406 if (ring == NULL) 2407 return; 2408 2409 if (i915_seqno_passed(ring->get_seqno(ring, true), 2410 obj->last_read_seqno)) 2411 i915_gem_object_move_to_inactive(obj); 2412 } 2413 2414 static int 2415 i915_gem_init_seqno(struct drm_device *dev, u32 seqno) 2416 { 2417 struct drm_i915_private *dev_priv = dev->dev_private; 2418 struct intel_engine_cs *ring; 2419 int ret, i, j; 2420 2421 /* Carefully retire all requests without writing to the rings */ 2422 for_each_ring(ring, dev_priv, i) { 2423 ret = intel_ring_idle(ring); 2424 if (ret) 2425 return ret; 2426 } 2427 i915_gem_retire_requests(dev); 2428 2429 /* Finally reset hw state */ 2430 for_each_ring(ring, dev_priv, i) { 2431 intel_ring_init_seqno(ring, seqno); 2432 2433 for (j = 0; j < ARRAY_SIZE(ring->semaphore.sync_seqno); j++) 2434 ring->semaphore.sync_seqno[j] = 0; 2435 } 2436 2437 return 0; 2438 } 2439 2440 int i915_gem_set_seqno(struct drm_device *dev, u32 seqno) 2441 { 2442 struct drm_i915_private *dev_priv = dev->dev_private; 2443 int ret; 2444 2445 if (seqno == 0) 2446 return -EINVAL; 2447 2448 /* HWS page needs to be set less than what we 2449 * will inject to ring 2450 */ 2451 ret = i915_gem_init_seqno(dev, seqno - 1); 2452 if (ret) 2453 return ret; 2454 2455 /* Carefully set the last_seqno value so that wrap 2456 * detection still works 2457 */ 2458 dev_priv->next_seqno = seqno; 2459 dev_priv->last_seqno = seqno - 1; 2460 if (dev_priv->last_seqno == 0) 2461 dev_priv->last_seqno--; 2462 2463 return 0; 2464 } 2465 2466 int 2467 i915_gem_get_seqno(struct drm_device *dev, u32 *seqno) 2468 { 2469 struct drm_i915_private *dev_priv = dev->dev_private; 2470 2471 /* reserve 0 for non-seqno */ 2472 if (dev_priv->next_seqno == 0) { 2473 int ret = i915_gem_init_seqno(dev, 0); 2474 if (ret) 2475 return ret; 2476 2477 dev_priv->next_seqno = 1; 2478 } 2479 2480 *seqno = dev_priv->last_seqno = dev_priv->next_seqno++; 2481 return 0; 2482 } 2483 2484 int __i915_add_request(struct intel_engine_cs *ring, 2485 struct drm_file *file, 2486 struct drm_i915_gem_object *obj, 2487 u32 *out_seqno) 2488 { 2489 struct drm_i915_private *dev_priv = ring->dev->dev_private; 2490 struct drm_i915_gem_request *request; 2491 u32 request_ring_position, request_start; 2492 int ret; 2493 2494 request_start = intel_ring_get_tail(ring->buffer); 2495 /* 2496 * Emit any outstanding flushes - execbuf can fail to emit the flush 2497 * after having emitted the batchbuffer command. Hence we need to fix 2498 * things up similar to emitting the lazy request. The difference here 2499 * is that the flush _must_ happen before the next request, no matter 2500 * what. 2501 */ 2502 ret = intel_ring_flush_all_caches(ring); 2503 if (ret) 2504 return ret; 2505 2506 request = ring->preallocated_lazy_request; 2507 if (WARN_ON(request == NULL)) 2508 return -ENOMEM; 2509 2510 /* Record the position of the start of the request so that 2511 * should we detect the updated seqno part-way through the 2512 * GPU processing the request, we never over-estimate the 2513 * position of the head. 2514 */ 2515 request_ring_position = intel_ring_get_tail(ring->buffer); 2516 2517 ret = ring->add_request(ring); 2518 if (ret) 2519 return ret; 2520 2521 request->seqno = intel_ring_get_seqno(ring); 2522 request->ring = ring; 2523 request->head = request_start; 2524 request->tail = request_ring_position; 2525 2526 /* Whilst this request exists, batch_obj will be on the 2527 * active_list, and so will hold the active reference. Only when this 2528 * request is retired will the the batch_obj be moved onto the 2529 * inactive_list and lose its active reference. Hence we do not need 2530 * to explicitly hold another reference here. 2531 */ 2532 request->batch_obj = obj; 2533 2534 /* Hold a reference to the current context so that we can inspect 2535 * it later in case a hangcheck error event fires. 2536 */ 2537 request->ctx = ring->last_context; 2538 if (request->ctx) 2539 i915_gem_context_reference(request->ctx); 2540 2541 request->emitted_jiffies = jiffies; 2542 list_add_tail(&request->list, &ring->request_list); 2543 request->file_priv = NULL; 2544 2545 if (file) { 2546 struct drm_i915_file_private *file_priv = file->driver_priv; 2547 2548 spin_lock(&file_priv->mm.lock); 2549 request->file_priv = file_priv; 2550 list_add_tail(&request->client_list, 2551 &file_priv->mm.request_list); 2552 spin_unlock(&file_priv->mm.lock); 2553 } 2554 2555 trace_i915_gem_request_add(ring, request->seqno); 2556 ring->outstanding_lazy_seqno = 0; 2557 ring->preallocated_lazy_request = NULL; 2558 2559 if (!dev_priv->ums.mm_suspended) { 2560 i915_queue_hangcheck(ring->dev); 2561 2562 cancel_delayed_work_sync(&dev_priv->mm.idle_work); 2563 queue_delayed_work(dev_priv->wq, 2564 &dev_priv->mm.retire_work, 2565 round_jiffies_up_relative(HZ)); 2566 intel_mark_busy(dev_priv->dev); 2567 } 2568 2569 if (out_seqno) 2570 *out_seqno = request->seqno; 2571 return 0; 2572 } 2573 2574 static inline void 2575 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) 2576 { 2577 struct drm_i915_file_private *file_priv = request->file_priv; 2578 2579 if (!file_priv) 2580 return; 2581 2582 spin_lock(&file_priv->mm.lock); 2583 list_del(&request->client_list); 2584 request->file_priv = NULL; 2585 spin_unlock(&file_priv->mm.lock); 2586 } 2587 2588 static bool i915_context_is_banned(struct drm_i915_private *dev_priv, 2589 const struct intel_context *ctx) 2590 { 2591 unsigned long elapsed; 2592 2593 elapsed = get_seconds() - ctx->hang_stats.guilty_ts; 2594 2595 if (ctx->hang_stats.banned) 2596 return true; 2597 2598 if (elapsed <= DRM_I915_CTX_BAN_PERIOD) { 2599 if (!i915_gem_context_is_default(ctx)) { 2600 DRM_DEBUG("context hanging too fast, banning!\n"); 2601 return true; 2602 } else if (i915_stop_ring_allow_ban(dev_priv)) { 2603 if (i915_stop_ring_allow_warn(dev_priv)) 2604 DRM_ERROR("gpu hanging too fast, banning!\n"); 2605 return true; 2606 } 2607 } 2608 2609 return false; 2610 } 2611 2612 static void i915_set_reset_status(struct drm_i915_private *dev_priv, 2613 struct intel_context *ctx, 2614 const bool guilty) 2615 { 2616 struct i915_ctx_hang_stats *hs; 2617 2618 if (WARN_ON(!ctx)) 2619 return; 2620 2621 hs = &ctx->hang_stats; 2622 2623 if (guilty) { 2624 hs->banned = i915_context_is_banned(dev_priv, ctx); 2625 hs->batch_active++; 2626 hs->guilty_ts = get_seconds(); 2627 } else { 2628 hs->batch_pending++; 2629 } 2630 } 2631 2632 static void i915_gem_free_request(struct drm_i915_gem_request *request) 2633 { 2634 list_del(&request->list); 2635 i915_gem_request_remove_from_client(request); 2636 2637 if (request->ctx) 2638 i915_gem_context_unreference(request->ctx); 2639 2640 kfree(request); 2641 } 2642 2643 struct drm_i915_gem_request * 2644 i915_gem_find_active_request(struct intel_engine_cs *ring) 2645 { 2646 struct drm_i915_gem_request *request; 2647 u32 completed_seqno; 2648 2649 completed_seqno = ring->get_seqno(ring, false); 2650 2651 list_for_each_entry(request, &ring->request_list, list) { 2652 if (i915_seqno_passed(completed_seqno, request->seqno)) 2653 continue; 2654 2655 return request; 2656 } 2657 2658 return NULL; 2659 } 2660 2661 static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv, 2662 struct intel_engine_cs *ring) 2663 { 2664 struct drm_i915_gem_request *request; 2665 bool ring_hung; 2666 2667 request = i915_gem_find_active_request(ring); 2668 2669 if (request == NULL) 2670 return; 2671 2672 ring_hung = ring->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG; 2673 2674 i915_set_reset_status(dev_priv, request->ctx, ring_hung); 2675 2676 list_for_each_entry_continue(request, &ring->request_list, list) 2677 i915_set_reset_status(dev_priv, request->ctx, false); 2678 } 2679 2680 static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv, 2681 struct intel_engine_cs *ring) 2682 { 2683 while (!list_empty(&ring->active_list)) { 2684 struct drm_i915_gem_object *obj; 2685 2686 obj = list_first_entry(&ring->active_list, 2687 struct drm_i915_gem_object, 2688 ring_list); 2689 2690 i915_gem_object_move_to_inactive(obj); 2691 } 2692 2693 /* 2694 * We must free the requests after all the corresponding objects have 2695 * been moved off active lists. Which is the same order as the normal 2696 * retire_requests function does. This is important if object hold 2697 * implicit references on things like e.g. ppgtt address spaces through 2698 * the request. 2699 */ 2700 while (!list_empty(&ring->request_list)) { 2701 struct drm_i915_gem_request *request; 2702 2703 request = list_first_entry(&ring->request_list, 2704 struct drm_i915_gem_request, 2705 list); 2706 2707 i915_gem_free_request(request); 2708 } 2709 2710 /* These may not have been flush before the reset, do so now */ 2711 kfree(ring->preallocated_lazy_request); 2712 ring->preallocated_lazy_request = NULL; 2713 ring->outstanding_lazy_seqno = 0; 2714 } 2715 2716 void i915_gem_restore_fences(struct drm_device *dev) 2717 { 2718 struct drm_i915_private *dev_priv = dev->dev_private; 2719 int i; 2720 2721 for (i = 0; i < dev_priv->num_fence_regs; i++) { 2722 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; 2723 2724 /* 2725 * Commit delayed tiling changes if we have an object still 2726 * attached to the fence, otherwise just clear the fence. 2727 */ 2728 if (reg->obj) { 2729 i915_gem_object_update_fence(reg->obj, reg, 2730 reg->obj->tiling_mode); 2731 } else { 2732 i915_gem_write_fence(dev, i, NULL); 2733 } 2734 } 2735 } 2736 2737 void i915_gem_reset(struct drm_device *dev) 2738 { 2739 struct drm_i915_private *dev_priv = dev->dev_private; 2740 struct intel_engine_cs *ring; 2741 int i; 2742 2743 /* 2744 * Before we free the objects from the requests, we need to inspect 2745 * them for finding the guilty party. As the requests only borrow 2746 * their reference to the objects, the inspection must be done first. 2747 */ 2748 for_each_ring(ring, dev_priv, i) 2749 i915_gem_reset_ring_status(dev_priv, ring); 2750 2751 for_each_ring(ring, dev_priv, i) 2752 i915_gem_reset_ring_cleanup(dev_priv, ring); 2753 2754 i915_gem_context_reset(dev); 2755 2756 i915_gem_restore_fences(dev); 2757 } 2758 2759 /** 2760 * This function clears the request list as sequence numbers are passed. 2761 */ 2762 void 2763 i915_gem_retire_requests_ring(struct intel_engine_cs *ring) 2764 { 2765 uint32_t seqno; 2766 2767 if (list_empty(&ring->request_list)) 2768 return; 2769 2770 WARN_ON(i915_verify_lists(ring->dev)); 2771 2772 seqno = ring->get_seqno(ring, true); 2773 2774 /* Move any buffers on the active list that are no longer referenced 2775 * by the ringbuffer to the flushing/inactive lists as appropriate, 2776 * before we free the context associated with the requests. 2777 */ 2778 while (!list_empty(&ring->active_list)) { 2779 struct drm_i915_gem_object *obj; 2780 2781 obj = list_first_entry(&ring->active_list, 2782 struct drm_i915_gem_object, 2783 ring_list); 2784 2785 if (!i915_seqno_passed(seqno, obj->last_read_seqno)) 2786 break; 2787 2788 i915_gem_object_move_to_inactive(obj); 2789 } 2790 2791 2792 while (!list_empty(&ring->request_list)) { 2793 struct drm_i915_gem_request *request; 2794 2795 request = list_first_entry(&ring->request_list, 2796 struct drm_i915_gem_request, 2797 list); 2798 2799 if (!i915_seqno_passed(seqno, request->seqno)) 2800 break; 2801 2802 trace_i915_gem_request_retire(ring, request->seqno); 2803 /* We know the GPU must have read the request to have 2804 * sent us the seqno + interrupt, so use the position 2805 * of tail of the request to update the last known position 2806 * of the GPU head. 2807 */ 2808 ring->buffer->last_retired_head = request->tail; 2809 2810 i915_gem_free_request(request); 2811 } 2812 2813 if (unlikely(ring->trace_irq_seqno && 2814 i915_seqno_passed(seqno, ring->trace_irq_seqno))) { 2815 ring->irq_put(ring); 2816 ring->trace_irq_seqno = 0; 2817 } 2818 2819 WARN_ON(i915_verify_lists(ring->dev)); 2820 } 2821 2822 bool 2823 i915_gem_retire_requests(struct drm_device *dev) 2824 { 2825 struct drm_i915_private *dev_priv = dev->dev_private; 2826 struct intel_engine_cs *ring; 2827 bool idle = true; 2828 int i; 2829 2830 for_each_ring(ring, dev_priv, i) { 2831 i915_gem_retire_requests_ring(ring); 2832 idle &= list_empty(&ring->request_list); 2833 } 2834 2835 if (idle) 2836 mod_delayed_work(dev_priv->wq, 2837 &dev_priv->mm.idle_work, 2838 msecs_to_jiffies(100)); 2839 2840 return idle; 2841 } 2842 2843 static void 2844 i915_gem_retire_work_handler(struct work_struct *work) 2845 { 2846 struct drm_i915_private *dev_priv = 2847 container_of(work, typeof(*dev_priv), mm.retire_work.work); 2848 struct drm_device *dev = dev_priv->dev; 2849 bool idle; 2850 2851 /* Come back later if the device is busy... */ 2852 idle = false; 2853 if (mutex_trylock(&dev->struct_mutex)) { 2854 idle = i915_gem_retire_requests(dev); 2855 mutex_unlock(&dev->struct_mutex); 2856 } 2857 if (!idle) 2858 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 2859 round_jiffies_up_relative(HZ)); 2860 } 2861 2862 static void 2863 i915_gem_idle_work_handler(struct work_struct *work) 2864 { 2865 struct drm_i915_private *dev_priv = 2866 container_of(work, typeof(*dev_priv), mm.idle_work.work); 2867 2868 intel_mark_idle(dev_priv->dev); 2869 } 2870 2871 /** 2872 * Ensures that an object will eventually get non-busy by flushing any required 2873 * write domains, emitting any outstanding lazy request and retiring and 2874 * completed requests. 2875 */ 2876 static int 2877 i915_gem_object_flush_active(struct drm_i915_gem_object *obj) 2878 { 2879 int ret; 2880 2881 if (obj->active) { 2882 ret = i915_gem_check_olr(obj->ring, obj->last_read_seqno); 2883 if (ret) 2884 return ret; 2885 2886 i915_gem_retire_requests_ring(obj->ring); 2887 } 2888 2889 return 0; 2890 } 2891 2892 /** 2893 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT 2894 * @DRM_IOCTL_ARGS: standard ioctl arguments 2895 * 2896 * Returns 0 if successful, else an error is returned with the remaining time in 2897 * the timeout parameter. 2898 * -ETIME: object is still busy after timeout 2899 * -ERESTARTSYS: signal interrupted the wait 2900 * -ENONENT: object doesn't exist 2901 * Also possible, but rare: 2902 * -EAGAIN: GPU wedged 2903 * -ENOMEM: damn 2904 * -ENODEV: Internal IRQ fail 2905 * -E?: The add request failed 2906 * 2907 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any 2908 * non-zero timeout parameter the wait ioctl will wait for the given number of 2909 * nanoseconds on an object becoming unbusy. Since the wait itself does so 2910 * without holding struct_mutex the object may become re-busied before this 2911 * function completes. A similar but shorter * race condition exists in the busy 2912 * ioctl 2913 */ 2914 int 2915 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 2916 { 2917 struct drm_i915_private *dev_priv = dev->dev_private; 2918 struct drm_i915_gem_wait *args = data; 2919 struct drm_i915_gem_object *obj; 2920 struct intel_engine_cs *ring = NULL; 2921 struct timespec timeout_stack, *timeout = NULL; 2922 unsigned reset_counter; 2923 u32 seqno = 0; 2924 int ret = 0; 2925 2926 if (args->timeout_ns >= 0) { 2927 timeout_stack = ns_to_timespec(args->timeout_ns); 2928 timeout = &timeout_stack; 2929 } 2930 2931 ret = i915_mutex_lock_interruptible(dev); 2932 if (ret) 2933 return ret; 2934 2935 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->bo_handle)); 2936 if (&obj->base == NULL) { 2937 mutex_unlock(&dev->struct_mutex); 2938 return -ENOENT; 2939 } 2940 2941 /* Need to make sure the object gets inactive eventually. */ 2942 ret = i915_gem_object_flush_active(obj); 2943 if (ret) 2944 goto out; 2945 2946 if (obj->active) { 2947 seqno = obj->last_read_seqno; 2948 ring = obj->ring; 2949 } 2950 2951 if (seqno == 0) 2952 goto out; 2953 2954 /* Do this after OLR check to make sure we make forward progress polling 2955 * on this IOCTL with a 0 timeout (like busy ioctl) 2956 */ 2957 if (!args->timeout_ns) { 2958 ret = -ETIMEDOUT; 2959 goto out; 2960 } 2961 2962 drm_gem_object_unreference(&obj->base); 2963 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 2964 mutex_unlock(&dev->struct_mutex); 2965 2966 ret = __wait_seqno(ring, seqno, reset_counter, true, timeout, file->driver_priv); 2967 if (timeout) 2968 args->timeout_ns = timespec_to_ns(timeout); 2969 return ret; 2970 2971 out: 2972 drm_gem_object_unreference(&obj->base); 2973 mutex_unlock(&dev->struct_mutex); 2974 return ret; 2975 } 2976 2977 /** 2978 * i915_gem_object_sync - sync an object to a ring. 2979 * 2980 * @obj: object which may be in use on another ring. 2981 * @to: ring we wish to use the object on. May be NULL. 2982 * 2983 * This code is meant to abstract object synchronization with the GPU. 2984 * Calling with NULL implies synchronizing the object with the CPU 2985 * rather than a particular GPU ring. 2986 * 2987 * Returns 0 if successful, else propagates up the lower layer error. 2988 */ 2989 int 2990 i915_gem_object_sync(struct drm_i915_gem_object *obj, 2991 struct intel_engine_cs *to) 2992 { 2993 struct intel_engine_cs *from = obj->ring; 2994 u32 seqno; 2995 int ret, idx; 2996 2997 if (from == NULL || to == from) 2998 return 0; 2999 3000 if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev)) 3001 return i915_gem_object_wait_rendering(obj, false); 3002 3003 idx = intel_ring_sync_index(from, to); 3004 3005 seqno = obj->last_read_seqno; 3006 /* Optimization: Avoid semaphore sync when we are sure we already 3007 * waited for an object with higher seqno */ 3008 if (seqno <= from->semaphore.sync_seqno[idx]) 3009 return 0; 3010 3011 ret = i915_gem_check_olr(obj->ring, seqno); 3012 if (ret) 3013 return ret; 3014 3015 trace_i915_gem_ring_sync_to(from, to, seqno); 3016 ret = to->semaphore.sync_to(to, from, seqno); 3017 if (!ret) 3018 /* We use last_read_seqno because sync_to() 3019 * might have just caused seqno wrap under 3020 * the radar. 3021 */ 3022 from->semaphore.sync_seqno[idx] = obj->last_read_seqno; 3023 3024 return ret; 3025 } 3026 3027 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) 3028 { 3029 u32 old_write_domain, old_read_domains; 3030 3031 /* Force a pagefault for domain tracking on next user access */ 3032 i915_gem_release_mmap(obj); 3033 3034 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3035 return; 3036 3037 /* Wait for any direct GTT access to complete */ 3038 mb(); 3039 3040 old_read_domains = obj->base.read_domains; 3041 old_write_domain = obj->base.write_domain; 3042 3043 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT; 3044 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT; 3045 3046 trace_i915_gem_object_change_domain(obj, 3047 old_read_domains, 3048 old_write_domain); 3049 } 3050 3051 int i915_vma_unbind(struct i915_vma *vma) 3052 { 3053 struct drm_i915_gem_object *obj = vma->obj; 3054 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3055 int ret; 3056 3057 if (list_empty(&vma->vma_link)) 3058 return 0; 3059 3060 if (!drm_mm_node_allocated(&vma->node)) { 3061 i915_gem_vma_destroy(vma); 3062 return 0; 3063 } 3064 3065 if (vma->pin_count) 3066 return -EBUSY; 3067 3068 BUG_ON(obj->pages == NULL); 3069 3070 ret = i915_gem_object_finish_gpu(obj); 3071 if (ret) 3072 return ret; 3073 /* Continue on if we fail due to EIO, the GPU is hung so we 3074 * should be safe and we need to cleanup or else we might 3075 * cause memory corruption through use-after-free. 3076 */ 3077 3078 if (i915_is_ggtt(vma->vm)) { 3079 i915_gem_object_finish_gtt(obj); 3080 3081 /* release the fence reg _after_ flushing */ 3082 ret = i915_gem_object_put_fence(obj); 3083 if (ret) 3084 return ret; 3085 } 3086 3087 trace_i915_vma_unbind(vma); 3088 3089 vma->unbind_vma(vma); 3090 3091 list_del_init(&vma->mm_list); 3092 /* Avoid an unnecessary call to unbind on rebind. */ 3093 if (i915_is_ggtt(vma->vm)) 3094 obj->map_and_fenceable = true; 3095 3096 drm_mm_remove_node(&vma->node); 3097 i915_gem_vma_destroy(vma); 3098 3099 /* Since the unbound list is global, only move to that list if 3100 * no more VMAs exist. */ 3101 if (list_empty(&obj->vma_list)) { 3102 i915_gem_gtt_finish_object(obj); 3103 list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list); 3104 } 3105 3106 /* And finally now the object is completely decoupled from this vma, 3107 * we can drop its hold on the backing storage and allow it to be 3108 * reaped by the shrinker. 3109 */ 3110 i915_gem_object_unpin_pages(obj); 3111 3112 return 0; 3113 } 3114 3115 int i915_gpu_idle(struct drm_device *dev) 3116 { 3117 struct drm_i915_private *dev_priv = dev->dev_private; 3118 struct intel_engine_cs *ring; 3119 int ret, i; 3120 3121 /* Flush everything onto the inactive list. */ 3122 for_each_ring(ring, dev_priv, i) { 3123 ret = i915_switch_context(ring, ring->default_context); 3124 if (ret) 3125 return ret; 3126 3127 ret = intel_ring_idle(ring); 3128 if (ret) 3129 return ret; 3130 } 3131 3132 return 0; 3133 } 3134 3135 static void i965_write_fence_reg(struct drm_device *dev, int reg, 3136 struct drm_i915_gem_object *obj) 3137 { 3138 struct drm_i915_private *dev_priv = dev->dev_private; 3139 int fence_reg; 3140 int fence_pitch_shift; 3141 3142 if (INTEL_INFO(dev)->gen >= 6) { 3143 fence_reg = FENCE_REG_SANDYBRIDGE_0; 3144 fence_pitch_shift = SANDYBRIDGE_FENCE_PITCH_SHIFT; 3145 } else { 3146 fence_reg = FENCE_REG_965_0; 3147 fence_pitch_shift = I965_FENCE_PITCH_SHIFT; 3148 } 3149 3150 fence_reg += reg * 8; 3151 3152 /* To w/a incoherency with non-atomic 64-bit register updates, 3153 * we split the 64-bit update into two 32-bit writes. In order 3154 * for a partial fence not to be evaluated between writes, we 3155 * precede the update with write to turn off the fence register, 3156 * and only enable the fence as the last step. 3157 * 3158 * For extra levels of paranoia, we make sure each step lands 3159 * before applying the next step. 3160 */ 3161 I915_WRITE(fence_reg, 0); 3162 POSTING_READ(fence_reg); 3163 3164 if (obj) { 3165 u32 size = i915_gem_obj_ggtt_size(obj); 3166 uint64_t val; 3167 3168 val = (uint64_t)((i915_gem_obj_ggtt_offset(obj) + size - 4096) & 3169 0xfffff000) << 32; 3170 val |= i915_gem_obj_ggtt_offset(obj) & 0xfffff000; 3171 val |= (uint64_t)((obj->stride / 128) - 1) << fence_pitch_shift; 3172 if (obj->tiling_mode == I915_TILING_Y) 3173 val |= 1 << I965_FENCE_TILING_Y_SHIFT; 3174 val |= I965_FENCE_REG_VALID; 3175 3176 I915_WRITE(fence_reg + 4, val >> 32); 3177 POSTING_READ(fence_reg + 4); 3178 3179 I915_WRITE(fence_reg + 0, val); 3180 POSTING_READ(fence_reg); 3181 } else { 3182 I915_WRITE(fence_reg + 4, 0); 3183 POSTING_READ(fence_reg + 4); 3184 } 3185 } 3186 3187 static void i915_write_fence_reg(struct drm_device *dev, int reg, 3188 struct drm_i915_gem_object *obj) 3189 { 3190 struct drm_i915_private *dev_priv = dev->dev_private; 3191 u32 val; 3192 3193 if (obj) { 3194 u32 size = i915_gem_obj_ggtt_size(obj); 3195 int pitch_val; 3196 int tile_width; 3197 3198 WARN((i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK) || 3199 (size & -size) != size || 3200 (i915_gem_obj_ggtt_offset(obj) & (size - 1)), 3201 "object 0x%08lx [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n", 3202 i915_gem_obj_ggtt_offset(obj), obj->map_and_fenceable, size); 3203 3204 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)) 3205 tile_width = 128; 3206 else 3207 tile_width = 512; 3208 3209 /* Note: pitch better be a power of two tile widths */ 3210 pitch_val = obj->stride / tile_width; 3211 pitch_val = ffs(pitch_val) - 1; 3212 3213 val = i915_gem_obj_ggtt_offset(obj); 3214 if (obj->tiling_mode == I915_TILING_Y) 3215 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 3216 val |= I915_FENCE_SIZE_BITS(size); 3217 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 3218 val |= I830_FENCE_REG_VALID; 3219 } else 3220 val = 0; 3221 3222 if (reg < 8) 3223 reg = FENCE_REG_830_0 + reg * 4; 3224 else 3225 reg = FENCE_REG_945_8 + (reg - 8) * 4; 3226 3227 I915_WRITE(reg, val); 3228 POSTING_READ(reg); 3229 } 3230 3231 static void i830_write_fence_reg(struct drm_device *dev, int reg, 3232 struct drm_i915_gem_object *obj) 3233 { 3234 struct drm_i915_private *dev_priv = dev->dev_private; 3235 uint32_t val; 3236 3237 if (obj) { 3238 u32 size = i915_gem_obj_ggtt_size(obj); 3239 uint32_t pitch_val; 3240 3241 WARN((i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK) || 3242 (size & -size) != size || 3243 (i915_gem_obj_ggtt_offset(obj) & (size - 1)), 3244 "object 0x%08lx not 512K or pot-size 0x%08x aligned\n", 3245 i915_gem_obj_ggtt_offset(obj), size); 3246 3247 pitch_val = obj->stride / 128; 3248 pitch_val = ffs(pitch_val) - 1; 3249 3250 val = i915_gem_obj_ggtt_offset(obj); 3251 if (obj->tiling_mode == I915_TILING_Y) 3252 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 3253 val |= I830_FENCE_SIZE_BITS(size); 3254 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 3255 val |= I830_FENCE_REG_VALID; 3256 } else 3257 val = 0; 3258 3259 I915_WRITE(FENCE_REG_830_0 + reg * 4, val); 3260 POSTING_READ(FENCE_REG_830_0 + reg * 4); 3261 } 3262 3263 inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj) 3264 { 3265 return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT; 3266 } 3267 3268 static void i915_gem_write_fence(struct drm_device *dev, int reg, 3269 struct drm_i915_gem_object *obj) 3270 { 3271 struct drm_i915_private *dev_priv = dev->dev_private; 3272 3273 /* Ensure that all CPU reads are completed before installing a fence 3274 * and all writes before removing the fence. 3275 */ 3276 if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj)) 3277 mb(); 3278 3279 WARN(obj && (!obj->stride || !obj->tiling_mode), 3280 "bogus fence setup with stride: 0x%x, tiling mode: %i\n", 3281 obj->stride, obj->tiling_mode); 3282 3283 switch (INTEL_INFO(dev)->gen) { 3284 case 8: 3285 case 7: 3286 case 6: 3287 case 5: 3288 case 4: i965_write_fence_reg(dev, reg, obj); break; 3289 case 3: i915_write_fence_reg(dev, reg, obj); break; 3290 case 2: i830_write_fence_reg(dev, reg, obj); break; 3291 default: BUG(); 3292 } 3293 3294 /* And similarly be paranoid that no direct access to this region 3295 * is reordered to before the fence is installed. 3296 */ 3297 if (i915_gem_object_needs_mb(obj)) 3298 mb(); 3299 } 3300 3301 static inline int fence_number(struct drm_i915_private *dev_priv, 3302 struct drm_i915_fence_reg *fence) 3303 { 3304 return fence - dev_priv->fence_regs; 3305 } 3306 3307 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, 3308 struct drm_i915_fence_reg *fence, 3309 bool enable) 3310 { 3311 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3312 int reg = fence_number(dev_priv, fence); 3313 3314 i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL); 3315 3316 if (enable) { 3317 obj->fence_reg = reg; 3318 fence->obj = obj; 3319 list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list); 3320 } else { 3321 obj->fence_reg = I915_FENCE_REG_NONE; 3322 fence->obj = NULL; 3323 list_del_init(&fence->lru_list); 3324 } 3325 obj->fence_dirty = false; 3326 } 3327 3328 static int 3329 i915_gem_object_wait_fence(struct drm_i915_gem_object *obj) 3330 { 3331 if (obj->last_fenced_seqno) { 3332 int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno); 3333 if (ret) 3334 return ret; 3335 3336 obj->last_fenced_seqno = 0; 3337 } 3338 3339 obj->fenced_gpu_access = false; 3340 return 0; 3341 } 3342 3343 int 3344 i915_gem_object_put_fence(struct drm_i915_gem_object *obj) 3345 { 3346 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3347 struct drm_i915_fence_reg *fence; 3348 int ret; 3349 3350 ret = i915_gem_object_wait_fence(obj); 3351 if (ret) 3352 return ret; 3353 3354 if (obj->fence_reg == I915_FENCE_REG_NONE) 3355 return 0; 3356 3357 fence = &dev_priv->fence_regs[obj->fence_reg]; 3358 3359 if (WARN_ON(fence->pin_count)) 3360 return -EBUSY; 3361 3362 i915_gem_object_fence_lost(obj); 3363 i915_gem_object_update_fence(obj, fence, false); 3364 3365 return 0; 3366 } 3367 3368 static struct drm_i915_fence_reg * 3369 i915_find_fence_reg(struct drm_device *dev) 3370 { 3371 struct drm_i915_private *dev_priv = dev->dev_private; 3372 struct drm_i915_fence_reg *reg, *avail; 3373 int i; 3374 3375 /* First try to find a free reg */ 3376 avail = NULL; 3377 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) { 3378 reg = &dev_priv->fence_regs[i]; 3379 if (!reg->obj) 3380 return reg; 3381 3382 if (!reg->pin_count) 3383 avail = reg; 3384 } 3385 3386 if (avail == NULL) 3387 goto deadlock; 3388 3389 /* None available, try to steal one or wait for a user to finish */ 3390 list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) { 3391 if (reg->pin_count) 3392 continue; 3393 3394 return reg; 3395 } 3396 3397 deadlock: 3398 /* Wait for completion of pending flips which consume fences */ 3399 if (intel_has_pending_fb_unpin(dev)) 3400 return ERR_PTR(-EAGAIN); 3401 3402 return ERR_PTR(-EDEADLK); 3403 } 3404 3405 /** 3406 * i915_gem_object_get_fence - set up fencing for an object 3407 * @obj: object to map through a fence reg 3408 * 3409 * When mapping objects through the GTT, userspace wants to be able to write 3410 * to them without having to worry about swizzling if the object is tiled. 3411 * This function walks the fence regs looking for a free one for @obj, 3412 * stealing one if it can't find any. 3413 * 3414 * It then sets up the reg based on the object's properties: address, pitch 3415 * and tiling format. 3416 * 3417 * For an untiled surface, this removes any existing fence. 3418 */ 3419 int 3420 i915_gem_object_get_fence(struct drm_i915_gem_object *obj) 3421 { 3422 struct drm_device *dev = obj->base.dev; 3423 struct drm_i915_private *dev_priv = dev->dev_private; 3424 bool enable = obj->tiling_mode != I915_TILING_NONE; 3425 struct drm_i915_fence_reg *reg; 3426 int ret; 3427 3428 /* Have we updated the tiling parameters upon the object and so 3429 * will need to serialise the write to the associated fence register? 3430 */ 3431 if (obj->fence_dirty) { 3432 ret = i915_gem_object_wait_fence(obj); 3433 if (ret) 3434 return ret; 3435 } 3436 3437 /* Just update our place in the LRU if our fence is getting reused. */ 3438 if (obj->fence_reg != I915_FENCE_REG_NONE) { 3439 reg = &dev_priv->fence_regs[obj->fence_reg]; 3440 if (!obj->fence_dirty) { 3441 list_move_tail(®->lru_list, 3442 &dev_priv->mm.fence_list); 3443 return 0; 3444 } 3445 } else if (enable) { 3446 reg = i915_find_fence_reg(dev); 3447 if (IS_ERR(reg)) 3448 return PTR_ERR(reg); 3449 3450 if (reg->obj) { 3451 struct drm_i915_gem_object *old = reg->obj; 3452 3453 ret = i915_gem_object_wait_fence(old); 3454 if (ret) 3455 return ret; 3456 3457 i915_gem_object_fence_lost(old); 3458 } 3459 } else 3460 return 0; 3461 3462 i915_gem_object_update_fence(obj, reg, enable); 3463 3464 return 0; 3465 } 3466 3467 static bool i915_gem_valid_gtt_space(struct drm_device *dev, 3468 struct drm_mm_node *gtt_space, 3469 unsigned long cache_level) 3470 { 3471 struct drm_mm_node *other; 3472 3473 /* On non-LLC machines we have to be careful when putting differing 3474 * types of snoopable memory together to avoid the prefetcher 3475 * crossing memory domains and dying. 3476 */ 3477 if (HAS_LLC(dev)) 3478 return true; 3479 3480 if (!drm_mm_node_allocated(gtt_space)) 3481 return true; 3482 3483 if (list_empty(>t_space->node_list)) 3484 return true; 3485 3486 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list); 3487 if (other->allocated && !other->hole_follows && other->color != cache_level) 3488 return false; 3489 3490 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list); 3491 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level) 3492 return false; 3493 3494 return true; 3495 } 3496 3497 static void i915_gem_verify_gtt(struct drm_device *dev) 3498 { 3499 #if WATCH_GTT 3500 struct drm_i915_private *dev_priv = dev->dev_private; 3501 struct drm_i915_gem_object *obj; 3502 int err = 0; 3503 3504 list_for_each_entry(obj, &dev_priv->mm.gtt_list, global_list) { 3505 if (obj->gtt_space == NULL) { 3506 printk(KERN_ERR "object found on GTT list with no space reserved\n"); 3507 err++; 3508 continue; 3509 } 3510 3511 if (obj->cache_level != obj->gtt_space->color) { 3512 printk(KERN_ERR "object reserved space [%08lx, %08lx] with wrong color, cache_level=%x, color=%lx\n", 3513 i915_gem_obj_ggtt_offset(obj), 3514 i915_gem_obj_ggtt_offset(obj) + i915_gem_obj_ggtt_size(obj), 3515 obj->cache_level, 3516 obj->gtt_space->color); 3517 err++; 3518 continue; 3519 } 3520 3521 if (!i915_gem_valid_gtt_space(dev, 3522 obj->gtt_space, 3523 obj->cache_level)) { 3524 printk(KERN_ERR "invalid GTT space found at [%08lx, %08lx] - color=%x\n", 3525 i915_gem_obj_ggtt_offset(obj), 3526 i915_gem_obj_ggtt_offset(obj) + i915_gem_obj_ggtt_size(obj), 3527 obj->cache_level); 3528 err++; 3529 continue; 3530 } 3531 } 3532 3533 WARN_ON(err); 3534 #endif 3535 } 3536 3537 /** 3538 * Finds free space in the GTT aperture and binds the object there. 3539 */ 3540 static struct i915_vma * 3541 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, 3542 struct i915_address_space *vm, 3543 unsigned alignment, 3544 uint64_t flags) 3545 { 3546 struct drm_device *dev = obj->base.dev; 3547 struct drm_i915_private *dev_priv = dev->dev_private; 3548 u32 size, fence_size, fence_alignment, unfenced_alignment; 3549 unsigned long start = 3550 flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; 3551 unsigned long end = 3552 flags & PIN_MAPPABLE ? dev_priv->gtt.mappable_end : vm->total; 3553 struct i915_vma *vma; 3554 int ret; 3555 3556 fence_size = i915_gem_get_gtt_size(dev, 3557 obj->base.size, 3558 obj->tiling_mode); 3559 fence_alignment = i915_gem_get_gtt_alignment(dev, 3560 obj->base.size, 3561 obj->tiling_mode, true); 3562 unfenced_alignment = 3563 i915_gem_get_gtt_alignment(dev, 3564 obj->base.size, 3565 obj->tiling_mode, false); 3566 3567 if (alignment == 0) 3568 alignment = flags & PIN_MAPPABLE ? fence_alignment : 3569 unfenced_alignment; 3570 if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) { 3571 DRM_DEBUG("Invalid object alignment requested %u\n", alignment); 3572 return ERR_PTR(-EINVAL); 3573 } 3574 3575 size = flags & PIN_MAPPABLE ? fence_size : obj->base.size; 3576 3577 /* If the object is bigger than the entire aperture, reject it early 3578 * before evicting everything in a vain attempt to find space. 3579 */ 3580 if (obj->base.size > end) { 3581 DRM_DEBUG("Attempting to bind an object larger than the aperture: object=%zd > %s aperture=%lu\n", 3582 obj->base.size, 3583 flags & PIN_MAPPABLE ? "mappable" : "total", 3584 end); 3585 return ERR_PTR(-E2BIG); 3586 } 3587 3588 ret = i915_gem_object_get_pages(obj); 3589 if (ret) 3590 return ERR_PTR(ret); 3591 3592 i915_gem_object_pin_pages(obj); 3593 3594 vma = i915_gem_obj_lookup_or_create_vma(obj, vm); 3595 if (IS_ERR(vma)) 3596 goto err_unpin; 3597 3598 search_free: 3599 ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, 3600 size, alignment, 3601 obj->cache_level, 3602 start, end, 3603 DRM_MM_SEARCH_DEFAULT, 3604 DRM_MM_CREATE_DEFAULT); 3605 if (ret) { 3606 ret = i915_gem_evict_something(dev, vm, size, alignment, 3607 obj->cache_level, 3608 start, end, 3609 flags); 3610 if (ret == 0) 3611 goto search_free; 3612 3613 goto err_free_vma; 3614 } 3615 if (WARN_ON(!i915_gem_valid_gtt_space(dev, &vma->node, 3616 obj->cache_level))) { 3617 ret = -EINVAL; 3618 goto err_remove_node; 3619 } 3620 3621 ret = i915_gem_gtt_prepare_object(obj); 3622 if (ret) 3623 goto err_remove_node; 3624 3625 list_move_tail(&obj->global_list, &dev_priv->mm.bound_list); 3626 list_add_tail(&vma->mm_list, &vm->inactive_list); 3627 3628 if (i915_is_ggtt(vm)) { 3629 bool mappable, fenceable; 3630 3631 fenceable = (vma->node.size == fence_size && 3632 (vma->node.start & (fence_alignment - 1)) == 0); 3633 3634 mappable = (vma->node.start + obj->base.size <= 3635 dev_priv->gtt.mappable_end); 3636 3637 obj->map_and_fenceable = mappable && fenceable; 3638 } 3639 3640 WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable); 3641 3642 trace_i915_vma_bind(vma, flags); 3643 vma->bind_vma(vma, obj->cache_level, 3644 flags & (PIN_MAPPABLE | PIN_GLOBAL) ? GLOBAL_BIND : 0); 3645 3646 i915_gem_verify_gtt(dev); 3647 return vma; 3648 3649 err_remove_node: 3650 drm_mm_remove_node(&vma->node); 3651 err_free_vma: 3652 i915_gem_vma_destroy(vma); 3653 vma = ERR_PTR(ret); 3654 err_unpin: 3655 i915_gem_object_unpin_pages(obj); 3656 return vma; 3657 } 3658 3659 bool 3660 i915_gem_clflush_object(struct drm_i915_gem_object *obj, 3661 bool force) 3662 { 3663 /* If we don't have a page list set up, then we're not pinned 3664 * to GPU, and we can ignore the cache flush because it'll happen 3665 * again at bind time. 3666 */ 3667 if (obj->pages == NULL) 3668 return false; 3669 3670 /* 3671 * Stolen memory is always coherent with the GPU as it is explicitly 3672 * marked as wc by the system, or the system is cache-coherent. 3673 */ 3674 if (obj->stolen) 3675 return false; 3676 3677 /* If the GPU is snooping the contents of the CPU cache, 3678 * we do not need to manually clear the CPU cache lines. However, 3679 * the caches are only snooped when the render cache is 3680 * flushed/invalidated. As we always have to emit invalidations 3681 * and flushes when moving into and out of the RENDER domain, correct 3682 * snooping behaviour occurs naturally as the result of our domain 3683 * tracking. 3684 */ 3685 if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) 3686 return false; 3687 3688 trace_i915_gem_object_clflush(obj); 3689 drm_clflush_pages(obj->pages, obj->base.size / PAGE_SIZE); 3690 3691 return true; 3692 } 3693 3694 /** Flushes the GTT write domain for the object if it's dirty. */ 3695 static void 3696 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) 3697 { 3698 uint32_t old_write_domain; 3699 3700 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) 3701 return; 3702 3703 /* No actual flushing is required for the GTT write domain. Writes 3704 * to it immediately go to main memory as far as we know, so there's 3705 * no chipset flush. It also doesn't land in render cache. 3706 * 3707 * However, we do have to enforce the order so that all writes through 3708 * the GTT land before any writes to the device, such as updates to 3709 * the GATT itself. 3710 */ 3711 wmb(); 3712 3713 old_write_domain = obj->base.write_domain; 3714 obj->base.write_domain = 0; 3715 3716 intel_fb_obj_flush(obj, false); 3717 3718 intel_fb_obj_flush(obj, false); 3719 3720 trace_i915_gem_object_change_domain(obj, 3721 obj->base.read_domains, 3722 old_write_domain); 3723 } 3724 3725 /** Flushes the CPU write domain for the object if it's dirty. */ 3726 static void 3727 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj, 3728 bool force) 3729 { 3730 uint32_t old_write_domain; 3731 3732 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) 3733 return; 3734 3735 if (i915_gem_clflush_object(obj, force)) 3736 i915_gem_chipset_flush(obj->base.dev); 3737 3738 old_write_domain = obj->base.write_domain; 3739 obj->base.write_domain = 0; 3740 3741 trace_i915_gem_object_change_domain(obj, 3742 obj->base.read_domains, 3743 old_write_domain); 3744 } 3745 3746 /** 3747 * Moves a single object to the GTT read, and possibly write domain. 3748 * 3749 * This function returns when the move is complete, including waiting on 3750 * flushes to occur. 3751 */ 3752 int 3753 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 3754 { 3755 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3756 uint32_t old_write_domain, old_read_domains; 3757 int ret; 3758 3759 /* Not valid to be called on unbound objects. */ 3760 if (!i915_gem_obj_bound_any(obj)) 3761 return -EINVAL; 3762 3763 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) 3764 return 0; 3765 3766 ret = i915_gem_object_wait_rendering(obj, !write); 3767 if (ret) 3768 return ret; 3769 3770 i915_gem_object_retire(obj); 3771 i915_gem_object_flush_cpu_write_domain(obj, false); 3772 3773 /* Serialise direct access to this object with the barriers for 3774 * coherent writes from the GPU, by effectively invalidating the 3775 * GTT domain upon first access. 3776 */ 3777 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3778 mb(); 3779 3780 old_write_domain = obj->base.write_domain; 3781 old_read_domains = obj->base.read_domains; 3782 3783 /* It should now be out of any other write domains, and we can update 3784 * the domain values for our changes. 3785 */ 3786 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 3787 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 3788 if (write) { 3789 obj->base.read_domains = I915_GEM_DOMAIN_GTT; 3790 obj->base.write_domain = I915_GEM_DOMAIN_GTT; 3791 obj->dirty = 1; 3792 } 3793 3794 if (write) 3795 intel_fb_obj_invalidate(obj, NULL); 3796 3797 trace_i915_gem_object_change_domain(obj, 3798 old_read_domains, 3799 old_write_domain); 3800 3801 /* And bump the LRU for this access */ 3802 if (i915_gem_object_is_inactive(obj)) { 3803 struct i915_vma *vma = i915_gem_obj_to_ggtt(obj); 3804 if (vma) 3805 list_move_tail(&vma->mm_list, 3806 &dev_priv->gtt.base.inactive_list); 3807 3808 } 3809 3810 return 0; 3811 } 3812 3813 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 3814 enum i915_cache_level cache_level) 3815 { 3816 struct drm_device *dev = obj->base.dev; 3817 struct i915_vma *vma, *next; 3818 int ret; 3819 3820 if (obj->cache_level == cache_level) 3821 return 0; 3822 3823 if (i915_gem_obj_is_pinned(obj)) { 3824 DRM_DEBUG("can not change the cache level of pinned objects\n"); 3825 return -EBUSY; 3826 } 3827 3828 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) { 3829 if (!i915_gem_valid_gtt_space(dev, &vma->node, cache_level)) { 3830 ret = i915_vma_unbind(vma); 3831 if (ret) 3832 return ret; 3833 } 3834 } 3835 3836 if (i915_gem_obj_bound_any(obj)) { 3837 ret = i915_gem_object_finish_gpu(obj); 3838 if (ret) 3839 return ret; 3840 3841 i915_gem_object_finish_gtt(obj); 3842 3843 /* Before SandyBridge, you could not use tiling or fence 3844 * registers with snooped memory, so relinquish any fences 3845 * currently pointing to our region in the aperture. 3846 */ 3847 if (INTEL_INFO(dev)->gen < 6) { 3848 ret = i915_gem_object_put_fence(obj); 3849 if (ret) 3850 return ret; 3851 } 3852 3853 list_for_each_entry(vma, &obj->vma_list, vma_link) 3854 if (drm_mm_node_allocated(&vma->node)) 3855 vma->bind_vma(vma, cache_level, 3856 obj->has_global_gtt_mapping ? GLOBAL_BIND : 0); 3857 } 3858 3859 list_for_each_entry(vma, &obj->vma_list, vma_link) 3860 vma->node.color = cache_level; 3861 obj->cache_level = cache_level; 3862 3863 if (cpu_write_needs_clflush(obj)) { 3864 u32 old_read_domains, old_write_domain; 3865 3866 /* If we're coming from LLC cached, then we haven't 3867 * actually been tracking whether the data is in the 3868 * CPU cache or not, since we only allow one bit set 3869 * in obj->write_domain and have been skipping the clflushes. 3870 * Just set it to the CPU cache for now. 3871 */ 3872 i915_gem_object_retire(obj); 3873 WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU); 3874 3875 old_read_domains = obj->base.read_domains; 3876 old_write_domain = obj->base.write_domain; 3877 3878 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 3879 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 3880 3881 trace_i915_gem_object_change_domain(obj, 3882 old_read_domains, 3883 old_write_domain); 3884 } 3885 3886 i915_gem_verify_gtt(dev); 3887 return 0; 3888 } 3889 3890 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 3891 struct drm_file *file) 3892 { 3893 struct drm_i915_gem_caching *args = data; 3894 struct drm_i915_gem_object *obj; 3895 int ret; 3896 3897 ret = i915_mutex_lock_interruptible(dev); 3898 if (ret) 3899 return ret; 3900 3901 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3902 if (&obj->base == NULL) { 3903 ret = -ENOENT; 3904 goto unlock; 3905 } 3906 3907 switch (obj->cache_level) { 3908 case I915_CACHE_LLC: 3909 case I915_CACHE_L3_LLC: 3910 args->caching = I915_CACHING_CACHED; 3911 break; 3912 3913 case I915_CACHE_WT: 3914 args->caching = I915_CACHING_DISPLAY; 3915 break; 3916 3917 default: 3918 args->caching = I915_CACHING_NONE; 3919 break; 3920 } 3921 3922 drm_gem_object_unreference(&obj->base); 3923 unlock: 3924 mutex_unlock(&dev->struct_mutex); 3925 return ret; 3926 } 3927 3928 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 3929 struct drm_file *file) 3930 { 3931 struct drm_i915_gem_caching *args = data; 3932 struct drm_i915_gem_object *obj; 3933 enum i915_cache_level level; 3934 int ret; 3935 3936 switch (args->caching) { 3937 case I915_CACHING_NONE: 3938 level = I915_CACHE_NONE; 3939 break; 3940 case I915_CACHING_CACHED: 3941 level = I915_CACHE_LLC; 3942 break; 3943 case I915_CACHING_DISPLAY: 3944 level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE; 3945 break; 3946 default: 3947 return -EINVAL; 3948 } 3949 3950 ret = i915_mutex_lock_interruptible(dev); 3951 if (ret) 3952 return ret; 3953 3954 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3955 if (&obj->base == NULL) { 3956 ret = -ENOENT; 3957 goto unlock; 3958 } 3959 3960 ret = i915_gem_object_set_cache_level(obj, level); 3961 3962 drm_gem_object_unreference(&obj->base); 3963 unlock: 3964 mutex_unlock(&dev->struct_mutex); 3965 return ret; 3966 } 3967 3968 static bool is_pin_display(struct drm_i915_gem_object *obj) 3969 { 3970 struct i915_vma *vma; 3971 3972 if (list_empty(&obj->vma_list)) 3973 return false; 3974 3975 vma = i915_gem_obj_to_ggtt(obj); 3976 if (!vma) 3977 return false; 3978 3979 /* There are 3 sources that pin objects: 3980 * 1. The display engine (scanouts, sprites, cursors); 3981 * 2. Reservations for execbuffer; 3982 * 3. The user. 3983 * 3984 * We can ignore reservations as we hold the struct_mutex and 3985 * are only called outside of the reservation path. The user 3986 * can only increment pin_count once, and so if after 3987 * subtracting the potential reference by the user, any pin_count 3988 * remains, it must be due to another use by the display engine. 3989 */ 3990 return vma->pin_count - !!obj->user_pin_count; 3991 } 3992 3993 /* 3994 * Prepare buffer for display plane (scanout, cursors, etc). 3995 * Can be called from an uninterruptible phase (modesetting) and allows 3996 * any flushes to be pipelined (for pageflips). 3997 */ 3998 int 3999 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 4000 u32 alignment, 4001 struct intel_engine_cs *pipelined) 4002 { 4003 u32 old_read_domains, old_write_domain; 4004 bool was_pin_display; 4005 int ret; 4006 4007 if (pipelined != obj->ring) { 4008 ret = i915_gem_object_sync(obj, pipelined); 4009 if (ret) 4010 return ret; 4011 } 4012 4013 /* Mark the pin_display early so that we account for the 4014 * display coherency whilst setting up the cache domains. 4015 */ 4016 was_pin_display = obj->pin_display; 4017 obj->pin_display = true; 4018 4019 /* The display engine is not coherent with the LLC cache on gen6. As 4020 * a result, we make sure that the pinning that is about to occur is 4021 * done with uncached PTEs. This is lowest common denominator for all 4022 * chipsets. 4023 * 4024 * However for gen6+, we could do better by using the GFDT bit instead 4025 * of uncaching, which would allow us to flush all the LLC-cached data 4026 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 4027 */ 4028 ret = i915_gem_object_set_cache_level(obj, 4029 HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE); 4030 if (ret) 4031 goto err_unpin_display; 4032 4033 /* As the user may map the buffer once pinned in the display plane 4034 * (e.g. libkms for the bootup splash), we have to ensure that we 4035 * always use map_and_fenceable for all scanout buffers. 4036 */ 4037 ret = i915_gem_obj_ggtt_pin(obj, alignment, PIN_MAPPABLE); 4038 if (ret) 4039 goto err_unpin_display; 4040 4041 i915_gem_object_flush_cpu_write_domain(obj, true); 4042 4043 old_write_domain = obj->base.write_domain; 4044 old_read_domains = obj->base.read_domains; 4045 4046 /* It should now be out of any other write domains, and we can update 4047 * the domain values for our changes. 4048 */ 4049 obj->base.write_domain = 0; 4050 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 4051 4052 trace_i915_gem_object_change_domain(obj, 4053 old_read_domains, 4054 old_write_domain); 4055 4056 return 0; 4057 4058 err_unpin_display: 4059 WARN_ON(was_pin_display != is_pin_display(obj)); 4060 obj->pin_display = was_pin_display; 4061 return ret; 4062 } 4063 4064 void 4065 i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj) 4066 { 4067 i915_gem_object_ggtt_unpin(obj); 4068 obj->pin_display = is_pin_display(obj); 4069 } 4070 4071 int 4072 i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj) 4073 { 4074 int ret; 4075 4076 if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0) 4077 return 0; 4078 4079 ret = i915_gem_object_wait_rendering(obj, false); 4080 if (ret) 4081 return ret; 4082 4083 /* Ensure that we invalidate the GPU's caches and TLBs. */ 4084 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS; 4085 return 0; 4086 } 4087 4088 /** 4089 * Moves a single object to the CPU read, and possibly write domain. 4090 * 4091 * This function returns when the move is complete, including waiting on 4092 * flushes to occur. 4093 */ 4094 int 4095 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 4096 { 4097 uint32_t old_write_domain, old_read_domains; 4098 int ret; 4099 4100 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) 4101 return 0; 4102 4103 ret = i915_gem_object_wait_rendering(obj, !write); 4104 if (ret) 4105 return ret; 4106 4107 i915_gem_object_retire(obj); 4108 i915_gem_object_flush_gtt_write_domain(obj); 4109 4110 old_write_domain = obj->base.write_domain; 4111 old_read_domains = obj->base.read_domains; 4112 4113 /* Flush the CPU cache if it's still invalid. */ 4114 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { 4115 i915_gem_clflush_object(obj, false); 4116 4117 obj->base.read_domains |= I915_GEM_DOMAIN_CPU; 4118 } 4119 4120 /* It should now be out of any other write domains, and we can update 4121 * the domain values for our changes. 4122 */ 4123 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 4124 4125 /* If we're writing through the CPU, then the GPU read domains will 4126 * need to be invalidated at next use. 4127 */ 4128 if (write) { 4129 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4130 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4131 } 4132 4133 if (write) 4134 intel_fb_obj_invalidate(obj, NULL); 4135 4136 trace_i915_gem_object_change_domain(obj, 4137 old_read_domains, 4138 old_write_domain); 4139 4140 return 0; 4141 } 4142 4143 /* Throttle our rendering by waiting until the ring has completed our requests 4144 * emitted over 20 msec ago. 4145 * 4146 * Note that if we were to use the current jiffies each time around the loop, 4147 * we wouldn't escape the function with any frames outstanding if the time to 4148 * render a frame was over 20ms. 4149 * 4150 * This should get us reasonable parallelism between CPU and GPU but also 4151 * relatively low latency when blocking on a particular request to finish. 4152 */ 4153 static int 4154 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 4155 { 4156 struct drm_i915_private *dev_priv = dev->dev_private; 4157 struct drm_i915_file_private *file_priv = file->driver_priv; 4158 unsigned long recent_enough = jiffies - msecs_to_jiffies(20); 4159 struct drm_i915_gem_request *request; 4160 struct intel_engine_cs *ring = NULL; 4161 unsigned reset_counter; 4162 u32 seqno = 0; 4163 int ret; 4164 4165 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 4166 if (ret) 4167 return ret; 4168 4169 ret = i915_gem_check_wedge(&dev_priv->gpu_error, false); 4170 if (ret) 4171 return ret; 4172 4173 spin_lock(&file_priv->mm.lock); 4174 list_for_each_entry(request, &file_priv->mm.request_list, client_list) { 4175 if (time_after_eq(request->emitted_jiffies, recent_enough)) 4176 break; 4177 4178 ring = request->ring; 4179 seqno = request->seqno; 4180 } 4181 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 4182 spin_unlock(&file_priv->mm.lock); 4183 4184 if (seqno == 0) 4185 return 0; 4186 4187 ret = __wait_seqno(ring, seqno, reset_counter, true, NULL, NULL); 4188 if (ret == 0) 4189 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0); 4190 4191 return ret; 4192 } 4193 4194 static bool 4195 i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags) 4196 { 4197 struct drm_i915_gem_object *obj = vma->obj; 4198 4199 if (alignment && 4200 vma->node.start & (alignment - 1)) 4201 return true; 4202 4203 if (flags & PIN_MAPPABLE && !obj->map_and_fenceable) 4204 return true; 4205 4206 if (flags & PIN_OFFSET_BIAS && 4207 vma->node.start < (flags & PIN_OFFSET_MASK)) 4208 return true; 4209 4210 return false; 4211 } 4212 4213 int 4214 i915_gem_object_pin(struct drm_i915_gem_object *obj, 4215 struct i915_address_space *vm, 4216 uint32_t alignment, 4217 uint64_t flags) 4218 { 4219 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 4220 struct i915_vma *vma; 4221 int ret; 4222 4223 if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base)) 4224 return -ENODEV; 4225 4226 if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm))) 4227 return -EINVAL; 4228 4229 vma = i915_gem_obj_to_vma(obj, vm); 4230 if (vma) { 4231 if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) 4232 return -EBUSY; 4233 4234 if (i915_vma_misplaced(vma, alignment, flags)) { 4235 WARN(vma->pin_count, 4236 "bo is already pinned with incorrect alignment:" 4237 " offset=%lx, req.alignment=%x, req.map_and_fenceable=%d," 4238 " obj->map_and_fenceable=%d\n", 4239 i915_gem_obj_offset(obj, vm), alignment, 4240 !!(flags & PIN_MAPPABLE), 4241 obj->map_and_fenceable); 4242 ret = i915_vma_unbind(vma); 4243 if (ret) 4244 return ret; 4245 4246 vma = NULL; 4247 } 4248 } 4249 4250 if (vma == NULL || !drm_mm_node_allocated(&vma->node)) { 4251 vma = i915_gem_object_bind_to_vm(obj, vm, alignment, flags); 4252 if (IS_ERR(vma)) 4253 return PTR_ERR(vma); 4254 } 4255 4256 if (flags & PIN_GLOBAL && !obj->has_global_gtt_mapping) 4257 vma->bind_vma(vma, obj->cache_level, GLOBAL_BIND); 4258 4259 vma->pin_count++; 4260 if (flags & PIN_MAPPABLE) 4261 obj->pin_mappable |= true; 4262 4263 return 0; 4264 } 4265 4266 void 4267 i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj) 4268 { 4269 struct i915_vma *vma = i915_gem_obj_to_ggtt(obj); 4270 4271 BUG_ON(!vma); 4272 BUG_ON(vma->pin_count == 0); 4273 BUG_ON(!i915_gem_obj_ggtt_bound(obj)); 4274 4275 if (--vma->pin_count == 0) 4276 obj->pin_mappable = false; 4277 } 4278 4279 bool 4280 i915_gem_object_pin_fence(struct drm_i915_gem_object *obj) 4281 { 4282 if (obj->fence_reg != I915_FENCE_REG_NONE) { 4283 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 4284 struct i915_vma *ggtt_vma = i915_gem_obj_to_ggtt(obj); 4285 4286 WARN_ON(!ggtt_vma || 4287 dev_priv->fence_regs[obj->fence_reg].pin_count > 4288 ggtt_vma->pin_count); 4289 dev_priv->fence_regs[obj->fence_reg].pin_count++; 4290 return true; 4291 } else 4292 return false; 4293 } 4294 4295 void 4296 i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj) 4297 { 4298 if (obj->fence_reg != I915_FENCE_REG_NONE) { 4299 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 4300 WARN_ON(dev_priv->fence_regs[obj->fence_reg].pin_count <= 0); 4301 dev_priv->fence_regs[obj->fence_reg].pin_count--; 4302 } 4303 } 4304 4305 int 4306 i915_gem_pin_ioctl(struct drm_device *dev, void *data, 4307 struct drm_file *file) 4308 { 4309 struct drm_i915_gem_pin *args = data; 4310 struct drm_i915_gem_object *obj; 4311 int ret; 4312 4313 if (INTEL_INFO(dev)->gen >= 6) 4314 return -ENODEV; 4315 4316 ret = i915_mutex_lock_interruptible(dev); 4317 if (ret) 4318 return ret; 4319 4320 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4321 if (&obj->base == NULL) { 4322 ret = -ENOENT; 4323 goto unlock; 4324 } 4325 4326 if (obj->madv != I915_MADV_WILLNEED) { 4327 DRM_DEBUG("Attempting to pin a purgeable buffer\n"); 4328 ret = -EFAULT; 4329 goto out; 4330 } 4331 4332 if (obj->pin_filp != NULL && obj->pin_filp != file) { 4333 DRM_DEBUG("Already pinned in i915_gem_pin_ioctl(): %d\n", 4334 args->handle); 4335 ret = -EINVAL; 4336 goto out; 4337 } 4338 4339 if (obj->user_pin_count == ULONG_MAX) { 4340 ret = -EBUSY; 4341 goto out; 4342 } 4343 4344 if (obj->user_pin_count == 0) { 4345 ret = i915_gem_obj_ggtt_pin(obj, args->alignment, PIN_MAPPABLE); 4346 if (ret) 4347 goto out; 4348 } 4349 4350 obj->user_pin_count++; 4351 obj->pin_filp = file; 4352 4353 args->offset = i915_gem_obj_ggtt_offset(obj); 4354 out: 4355 drm_gem_object_unreference(&obj->base); 4356 unlock: 4357 mutex_unlock(&dev->struct_mutex); 4358 return ret; 4359 } 4360 4361 int 4362 i915_gem_unpin_ioctl(struct drm_device *dev, void *data, 4363 struct drm_file *file) 4364 { 4365 struct drm_i915_gem_pin *args = data; 4366 struct drm_i915_gem_object *obj; 4367 int ret; 4368 4369 ret = i915_mutex_lock_interruptible(dev); 4370 if (ret) 4371 return ret; 4372 4373 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4374 if (&obj->base == NULL) { 4375 ret = -ENOENT; 4376 goto unlock; 4377 } 4378 4379 if (obj->pin_filp != file) { 4380 DRM_DEBUG("Not pinned by caller in i915_gem_pin_ioctl(): %d\n", 4381 args->handle); 4382 ret = -EINVAL; 4383 goto out; 4384 } 4385 obj->user_pin_count--; 4386 if (obj->user_pin_count == 0) { 4387 obj->pin_filp = NULL; 4388 i915_gem_object_ggtt_unpin(obj); 4389 } 4390 4391 out: 4392 drm_gem_object_unreference(&obj->base); 4393 unlock: 4394 mutex_unlock(&dev->struct_mutex); 4395 return ret; 4396 } 4397 4398 int 4399 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 4400 struct drm_file *file) 4401 { 4402 struct drm_i915_gem_busy *args = data; 4403 struct drm_i915_gem_object *obj; 4404 int ret; 4405 4406 ret = i915_mutex_lock_interruptible(dev); 4407 if (ret) 4408 return ret; 4409 4410 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4411 if (&obj->base == NULL) { 4412 ret = -ENOENT; 4413 goto unlock; 4414 } 4415 4416 /* Count all active objects as busy, even if they are currently not used 4417 * by the gpu. Users of this interface expect objects to eventually 4418 * become non-busy without any further actions, therefore emit any 4419 * necessary flushes here. 4420 */ 4421 ret = i915_gem_object_flush_active(obj); 4422 4423 args->busy = obj->active; 4424 if (obj->ring) { 4425 args->busy |= intel_ring_flag(obj->ring) << 16; 4426 } 4427 4428 drm_gem_object_unreference(&obj->base); 4429 unlock: 4430 mutex_unlock(&dev->struct_mutex); 4431 return ret; 4432 } 4433 4434 int 4435 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 4436 struct drm_file *file_priv) 4437 { 4438 return i915_gem_ring_throttle(dev, file_priv); 4439 } 4440 4441 int 4442 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 4443 struct drm_file *file_priv) 4444 { 4445 struct drm_i915_gem_madvise *args = data; 4446 struct drm_i915_gem_object *obj; 4447 int ret; 4448 4449 switch (args->madv) { 4450 case I915_MADV_DONTNEED: 4451 case I915_MADV_WILLNEED: 4452 break; 4453 default: 4454 return -EINVAL; 4455 } 4456 4457 ret = i915_mutex_lock_interruptible(dev); 4458 if (ret) 4459 return ret; 4460 4461 obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle)); 4462 if (&obj->base == NULL) { 4463 ret = -ENOENT; 4464 goto unlock; 4465 } 4466 4467 if (i915_gem_obj_is_pinned(obj)) { 4468 ret = -EINVAL; 4469 goto out; 4470 } 4471 4472 if (obj->madv != __I915_MADV_PURGED) 4473 obj->madv = args->madv; 4474 4475 /* if the object is no longer attached, discard its backing storage */ 4476 if (i915_gem_object_is_purgeable(obj) && obj->pages == NULL) 4477 i915_gem_object_truncate(obj); 4478 4479 args->retained = obj->madv != __I915_MADV_PURGED; 4480 4481 out: 4482 drm_gem_object_unreference(&obj->base); 4483 unlock: 4484 mutex_unlock(&dev->struct_mutex); 4485 return ret; 4486 } 4487 4488 void i915_gem_object_init(struct drm_i915_gem_object *obj, 4489 const struct drm_i915_gem_object_ops *ops) 4490 { 4491 INIT_LIST_HEAD(&obj->global_list); 4492 INIT_LIST_HEAD(&obj->ring_list); 4493 INIT_LIST_HEAD(&obj->obj_exec_link); 4494 INIT_LIST_HEAD(&obj->vma_list); 4495 4496 obj->ops = ops; 4497 4498 obj->fence_reg = I915_FENCE_REG_NONE; 4499 obj->madv = I915_MADV_WILLNEED; 4500 /* Avoid an unnecessary call to unbind on the first bind. */ 4501 obj->map_and_fenceable = true; 4502 4503 i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size); 4504 } 4505 4506 static const struct drm_i915_gem_object_ops i915_gem_object_ops = { 4507 .get_pages = i915_gem_object_get_pages_gtt, 4508 .put_pages = i915_gem_object_put_pages_gtt, 4509 }; 4510 4511 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, 4512 size_t size) 4513 { 4514 struct drm_i915_gem_object *obj; 4515 #if 0 4516 struct address_space *mapping; 4517 gfp_t mask; 4518 #endif 4519 4520 obj = i915_gem_object_alloc(dev); 4521 if (obj == NULL) 4522 return NULL; 4523 4524 if (drm_gem_object_init(dev, &obj->base, size) != 0) { 4525 i915_gem_object_free(obj); 4526 return NULL; 4527 } 4528 4529 #if 0 4530 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 4531 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) { 4532 /* 965gm cannot relocate objects above 4GiB. */ 4533 mask &= ~__GFP_HIGHMEM; 4534 mask |= __GFP_DMA32; 4535 } 4536 4537 mapping = file_inode(obj->base.filp)->i_mapping; 4538 mapping_set_gfp_mask(mapping, mask); 4539 #endif 4540 4541 i915_gem_object_init(obj, &i915_gem_object_ops); 4542 4543 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4544 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4545 4546 if (HAS_LLC(dev)) { 4547 /* On some devices, we can have the GPU use the LLC (the CPU 4548 * cache) for about a 10% performance improvement 4549 * compared to uncached. Graphics requests other than 4550 * display scanout are coherent with the CPU in 4551 * accessing this cache. This means in this mode we 4552 * don't need to clflush on the CPU side, and on the 4553 * GPU side we only need to flush internal caches to 4554 * get data visible to the CPU. 4555 * 4556 * However, we maintain the display planes as UC, and so 4557 * need to rebind when first used as such. 4558 */ 4559 obj->cache_level = I915_CACHE_LLC; 4560 } else 4561 obj->cache_level = I915_CACHE_NONE; 4562 4563 trace_i915_gem_object_create(obj); 4564 4565 return obj; 4566 } 4567 4568 static bool discard_backing_storage(struct drm_i915_gem_object *obj) 4569 { 4570 /* If we are the last user of the backing storage (be it shmemfs 4571 * pages or stolen etc), we know that the pages are going to be 4572 * immediately released. In this case, we can then skip copying 4573 * back the contents from the GPU. 4574 */ 4575 4576 if (obj->madv != I915_MADV_WILLNEED) 4577 return false; 4578 4579 if (obj->base.vm_obj == NULL) 4580 return true; 4581 4582 /* At first glance, this looks racy, but then again so would be 4583 * userspace racing mmap against close. However, the first external 4584 * reference to the filp can only be obtained through the 4585 * i915_gem_mmap_ioctl() which safeguards us against the user 4586 * acquiring such a reference whilst we are in the middle of 4587 * freeing the object. 4588 */ 4589 #if 0 4590 return atomic_long_read(&obj->base.filp->f_count) == 1; 4591 #else 4592 return false; 4593 #endif 4594 } 4595 4596 void i915_gem_free_object(struct drm_gem_object *gem_obj) 4597 { 4598 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 4599 struct drm_device *dev = obj->base.dev; 4600 struct drm_i915_private *dev_priv = dev->dev_private; 4601 struct i915_vma *vma, *next; 4602 4603 intel_runtime_pm_get(dev_priv); 4604 4605 trace_i915_gem_object_destroy(obj); 4606 4607 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) { 4608 int ret; 4609 4610 vma->pin_count = 0; 4611 ret = i915_vma_unbind(vma); 4612 if (WARN_ON(ret == -ERESTARTSYS)) { 4613 bool was_interruptible; 4614 4615 was_interruptible = dev_priv->mm.interruptible; 4616 dev_priv->mm.interruptible = false; 4617 4618 WARN_ON(i915_vma_unbind(vma)); 4619 4620 dev_priv->mm.interruptible = was_interruptible; 4621 } 4622 } 4623 4624 i915_gem_object_detach_phys(obj); 4625 4626 /* Stolen objects don't hold a ref, but do hold pin count. Fix that up 4627 * before progressing. */ 4628 if (obj->stolen) 4629 i915_gem_object_unpin_pages(obj); 4630 4631 WARN_ON(obj->frontbuffer_bits); 4632 4633 if (WARN_ON(obj->pages_pin_count)) 4634 obj->pages_pin_count = 0; 4635 if (discard_backing_storage(obj)) 4636 obj->madv = I915_MADV_DONTNEED; 4637 i915_gem_object_put_pages(obj); 4638 i915_gem_object_free_mmap_offset(obj); 4639 4640 BUG_ON(obj->pages); 4641 4642 #if 0 4643 if (obj->base.import_attach) 4644 drm_prime_gem_destroy(&obj->base, NULL); 4645 #endif 4646 4647 if (obj->ops->release) 4648 obj->ops->release(obj); 4649 4650 drm_gem_object_release(&obj->base); 4651 i915_gem_info_remove_obj(dev_priv, obj->base.size); 4652 4653 kfree(obj->bit_17); 4654 i915_gem_object_free(obj); 4655 4656 intel_runtime_pm_put(dev_priv); 4657 } 4658 4659 struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, 4660 struct i915_address_space *vm) 4661 { 4662 struct i915_vma *vma; 4663 list_for_each_entry(vma, &obj->vma_list, vma_link) 4664 if (vma->vm == vm) 4665 return vma; 4666 4667 return NULL; 4668 } 4669 4670 void i915_gem_vma_destroy(struct i915_vma *vma) 4671 { 4672 WARN_ON(vma->node.allocated); 4673 4674 /* Keep the vma as a placeholder in the execbuffer reservation lists */ 4675 if (!list_empty(&vma->exec_list)) 4676 return; 4677 4678 list_del(&vma->vma_link); 4679 4680 kfree(vma); 4681 } 4682 4683 static void 4684 i915_gem_stop_ringbuffers(struct drm_device *dev) 4685 { 4686 struct drm_i915_private *dev_priv = dev->dev_private; 4687 struct intel_engine_cs *ring; 4688 int i; 4689 4690 for_each_ring(ring, dev_priv, i) 4691 intel_stop_ring_buffer(ring); 4692 } 4693 4694 int 4695 i915_gem_suspend(struct drm_device *dev) 4696 { 4697 struct drm_i915_private *dev_priv = dev->dev_private; 4698 int ret = 0; 4699 4700 mutex_lock(&dev->struct_mutex); 4701 if (dev_priv->ums.mm_suspended) 4702 goto err; 4703 4704 ret = i915_gpu_idle(dev); 4705 if (ret) 4706 goto err; 4707 4708 i915_gem_retire_requests(dev); 4709 4710 /* Under UMS, be paranoid and evict. */ 4711 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 4712 i915_gem_evict_everything(dev); 4713 4714 i915_kernel_lost_context(dev); 4715 i915_gem_stop_ringbuffers(dev); 4716 4717 /* Hack! Don't let anybody do execbuf while we don't control the chip. 4718 * We need to replace this with a semaphore, or something. 4719 * And not confound ums.mm_suspended! 4720 */ 4721 dev_priv->ums.mm_suspended = !drm_core_check_feature(dev, 4722 DRIVER_MODESET); 4723 mutex_unlock(&dev->struct_mutex); 4724 4725 del_timer_sync(&dev_priv->gpu_error.hangcheck_timer); 4726 cancel_delayed_work_sync(&dev_priv->mm.retire_work); 4727 #if 0 4728 flush_delayed_work(&dev_priv->mm.idle_work); 4729 #endif 4730 4731 return 0; 4732 4733 err: 4734 mutex_unlock(&dev->struct_mutex); 4735 return ret; 4736 } 4737 4738 int i915_gem_l3_remap(struct intel_engine_cs *ring, int slice) 4739 { 4740 struct drm_device *dev = ring->dev; 4741 struct drm_i915_private *dev_priv = dev->dev_private; 4742 u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200); 4743 u32 *remap_info = dev_priv->l3_parity.remap_info[slice]; 4744 int i, ret; 4745 4746 if (!HAS_L3_DPF(dev) || !remap_info) 4747 return 0; 4748 4749 ret = intel_ring_begin(ring, GEN7_L3LOG_SIZE / 4 * 3); 4750 if (ret) 4751 return ret; 4752 4753 /* 4754 * Note: We do not worry about the concurrent register cacheline hang 4755 * here because no other code should access these registers other than 4756 * at initialization time. 4757 */ 4758 for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) { 4759 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 4760 intel_ring_emit(ring, reg_base + i); 4761 intel_ring_emit(ring, remap_info[i/4]); 4762 } 4763 4764 intel_ring_advance(ring); 4765 4766 return ret; 4767 } 4768 4769 void i915_gem_init_swizzling(struct drm_device *dev) 4770 { 4771 struct drm_i915_private *dev_priv = dev->dev_private; 4772 4773 if (INTEL_INFO(dev)->gen < 5 || 4774 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 4775 return; 4776 4777 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 4778 DISP_TILE_SURFACE_SWIZZLING); 4779 4780 if (IS_GEN5(dev)) 4781 return; 4782 4783 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 4784 if (IS_GEN6(dev)) 4785 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 4786 else if (IS_GEN7(dev)) 4787 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 4788 else if (IS_GEN8(dev)) 4789 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW)); 4790 else 4791 BUG(); 4792 } 4793 4794 static bool 4795 intel_enable_blt(struct drm_device *dev) 4796 { 4797 int revision; 4798 4799 if (!HAS_BLT(dev)) 4800 return false; 4801 4802 /* The blitter was dysfunctional on early prototypes */ 4803 revision = pci_read_config(dev->dev, PCIR_REVID, 1); 4804 if (IS_GEN6(dev) && revision < 8) { 4805 DRM_INFO("BLT not supported on this pre-production hardware;" 4806 " graphics performance will be degraded.\n"); 4807 return false; 4808 } 4809 4810 return true; 4811 } 4812 4813 static int i915_gem_init_rings(struct drm_device *dev) 4814 { 4815 struct drm_i915_private *dev_priv = dev->dev_private; 4816 int ret; 4817 4818 ret = intel_init_render_ring_buffer(dev); 4819 if (ret) 4820 return ret; 4821 4822 if (HAS_BSD(dev)) { 4823 ret = intel_init_bsd_ring_buffer(dev); 4824 if (ret) 4825 goto cleanup_render_ring; 4826 } 4827 4828 if (intel_enable_blt(dev)) { 4829 ret = intel_init_blt_ring_buffer(dev); 4830 if (ret) 4831 goto cleanup_bsd_ring; 4832 } 4833 4834 if (HAS_VEBOX(dev)) { 4835 ret = intel_init_vebox_ring_buffer(dev); 4836 if (ret) 4837 goto cleanup_blt_ring; 4838 } 4839 4840 if (HAS_BSD2(dev)) { 4841 ret = intel_init_bsd2_ring_buffer(dev); 4842 if (ret) 4843 goto cleanup_vebox_ring; 4844 } 4845 4846 ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000)); 4847 if (ret) 4848 goto cleanup_bsd2_ring; 4849 4850 return 0; 4851 4852 cleanup_bsd2_ring: 4853 intel_cleanup_ring_buffer(&dev_priv->ring[VCS2]); 4854 cleanup_vebox_ring: 4855 intel_cleanup_ring_buffer(&dev_priv->ring[VECS]); 4856 cleanup_blt_ring: 4857 intel_cleanup_ring_buffer(&dev_priv->ring[BCS]); 4858 cleanup_bsd_ring: 4859 intel_cleanup_ring_buffer(&dev_priv->ring[VCS]); 4860 cleanup_render_ring: 4861 intel_cleanup_ring_buffer(&dev_priv->ring[RCS]); 4862 4863 return ret; 4864 } 4865 4866 int 4867 i915_gem_init_hw(struct drm_device *dev) 4868 { 4869 struct drm_i915_private *dev_priv = dev->dev_private; 4870 int ret, i; 4871 4872 #if 0 4873 if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) 4874 return -EIO; 4875 #endif 4876 4877 if (dev_priv->ellc_size) 4878 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf)); 4879 4880 if (IS_HASWELL(dev)) 4881 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev) ? 4882 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); 4883 4884 if (HAS_PCH_NOP(dev)) { 4885 if (IS_IVYBRIDGE(dev)) { 4886 u32 temp = I915_READ(GEN7_MSG_CTL); 4887 temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK); 4888 I915_WRITE(GEN7_MSG_CTL, temp); 4889 } else if (INTEL_INFO(dev)->gen >= 7) { 4890 u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT); 4891 temp &= ~RESET_PCH_HANDSHAKE_ENABLE; 4892 I915_WRITE(HSW_NDE_RSTWRN_OPT, temp); 4893 } 4894 } 4895 4896 i915_gem_init_swizzling(dev); 4897 4898 ret = i915_gem_init_rings(dev); 4899 if (ret) 4900 return ret; 4901 4902 for (i = 0; i < NUM_L3_SLICES(dev); i++) 4903 i915_gem_l3_remap(&dev_priv->ring[RCS], i); 4904 4905 /* 4906 * XXX: Contexts should only be initialized once. Doing a switch to the 4907 * default context switch however is something we'd like to do after 4908 * reset or thaw (the latter may not actually be necessary for HW, but 4909 * goes with our code better). Context switching requires rings (for 4910 * the do_switch), but before enabling PPGTT. So don't move this. 4911 */ 4912 ret = i915_gem_context_enable(dev_priv); 4913 if (ret && ret != -EIO) { 4914 DRM_ERROR("Context enable failed %d\n", ret); 4915 i915_gem_cleanup_ringbuffer(dev); 4916 } 4917 4918 return ret; 4919 } 4920 4921 int i915_gem_init(struct drm_device *dev) 4922 { 4923 struct drm_i915_private *dev_priv = dev->dev_private; 4924 int ret; 4925 4926 mutex_lock(&dev->struct_mutex); 4927 4928 if (IS_VALLEYVIEW(dev)) { 4929 /* VLVA0 (potential hack), BIOS isn't actually waking us */ 4930 I915_WRITE(VLV_GTLC_WAKE_CTRL, VLV_GTLC_ALLOWWAKEREQ); 4931 if (wait_for((I915_READ(VLV_GTLC_PW_STATUS) & 4932 VLV_GTLC_ALLOWWAKEACK), 10)) 4933 DRM_DEBUG_DRIVER("allow wake ack timed out\n"); 4934 } 4935 4936 i915_gem_init_userptr(dev); 4937 i915_gem_init_global_gtt(dev); 4938 4939 ret = i915_gem_context_init(dev); 4940 if (ret) { 4941 mutex_unlock(&dev->struct_mutex); 4942 return ret; 4943 } 4944 4945 ret = i915_gem_init_hw(dev); 4946 if (ret == -EIO) { 4947 /* Allow ring initialisation to fail by marking the GPU as 4948 * wedged. But we only want to do this where the GPU is angry, 4949 * for all other failure, such as an allocation failure, bail. 4950 */ 4951 DRM_ERROR("Failed to initialize GPU, declaring it wedged\n"); 4952 atomic_set_mask(I915_WEDGED, &dev_priv->gpu_error.reset_counter); 4953 ret = 0; 4954 } 4955 mutex_unlock(&dev->struct_mutex); 4956 4957 /* Allow hardware batchbuffers unless told otherwise, but not for KMS. */ 4958 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 4959 dev_priv->dri1.allow_batchbuffer = 1; 4960 return ret; 4961 } 4962 4963 void 4964 i915_gem_cleanup_ringbuffer(struct drm_device *dev) 4965 { 4966 struct drm_i915_private *dev_priv = dev->dev_private; 4967 struct intel_engine_cs *ring; 4968 int i; 4969 4970 for_each_ring(ring, dev_priv, i) 4971 intel_cleanup_ring_buffer(ring); 4972 } 4973 4974 int 4975 i915_gem_entervt_ioctl(struct drm_device *dev, void *data, 4976 struct drm_file *file_priv) 4977 { 4978 struct drm_i915_private *dev_priv = dev->dev_private; 4979 int ret; 4980 4981 if (drm_core_check_feature(dev, DRIVER_MODESET)) 4982 return 0; 4983 4984 if (i915_reset_in_progress(&dev_priv->gpu_error)) { 4985 DRM_ERROR("Reenabling wedged hardware, good luck\n"); 4986 atomic_set(&dev_priv->gpu_error.reset_counter, 0); 4987 } 4988 4989 mutex_lock(&dev->struct_mutex); 4990 dev_priv->ums.mm_suspended = 0; 4991 4992 ret = i915_gem_init_hw(dev); 4993 if (ret != 0) { 4994 mutex_unlock(&dev->struct_mutex); 4995 return ret; 4996 } 4997 4998 BUG_ON(!list_empty(&dev_priv->gtt.base.active_list)); 4999 5000 ret = drm_irq_install(dev, dev->irq); 5001 if (ret) 5002 goto cleanup_ringbuffer; 5003 mutex_unlock(&dev->struct_mutex); 5004 5005 return 0; 5006 5007 cleanup_ringbuffer: 5008 i915_gem_cleanup_ringbuffer(dev); 5009 dev_priv->ums.mm_suspended = 1; 5010 mutex_unlock(&dev->struct_mutex); 5011 5012 return ret; 5013 } 5014 5015 int 5016 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, 5017 struct drm_file *file_priv) 5018 { 5019 if (drm_core_check_feature(dev, DRIVER_MODESET)) 5020 return 0; 5021 5022 mutex_lock(&dev->struct_mutex); 5023 drm_irq_uninstall(dev); 5024 mutex_unlock(&dev->struct_mutex); 5025 5026 return i915_gem_suspend(dev); 5027 } 5028 5029 void 5030 i915_gem_lastclose(struct drm_device *dev) 5031 { 5032 int ret; 5033 5034 if (drm_core_check_feature(dev, DRIVER_MODESET)) 5035 return; 5036 5037 ret = i915_gem_suspend(dev); 5038 if (ret) 5039 DRM_ERROR("failed to idle hardware: %d\n", ret); 5040 } 5041 5042 static void 5043 init_ring_lists(struct intel_engine_cs *ring) 5044 { 5045 INIT_LIST_HEAD(&ring->active_list); 5046 INIT_LIST_HEAD(&ring->request_list); 5047 } 5048 5049 void i915_init_vm(struct drm_i915_private *dev_priv, 5050 struct i915_address_space *vm) 5051 { 5052 if (!i915_is_ggtt(vm)) 5053 drm_mm_init(&vm->mm, vm->start, vm->total); 5054 vm->dev = dev_priv->dev; 5055 INIT_LIST_HEAD(&vm->active_list); 5056 INIT_LIST_HEAD(&vm->inactive_list); 5057 INIT_LIST_HEAD(&vm->global_link); 5058 list_add_tail(&vm->global_link, &dev_priv->vm_list); 5059 } 5060 5061 void 5062 i915_gem_load(struct drm_device *dev) 5063 { 5064 struct drm_i915_private *dev_priv = dev->dev_private; 5065 int i; 5066 5067 INIT_LIST_HEAD(&dev_priv->vm_list); 5068 i915_init_vm(dev_priv, &dev_priv->gtt.base); 5069 5070 INIT_LIST_HEAD(&dev_priv->context_list); 5071 INIT_LIST_HEAD(&dev_priv->mm.unbound_list); 5072 INIT_LIST_HEAD(&dev_priv->mm.bound_list); 5073 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 5074 for (i = 0; i < I915_NUM_RINGS; i++) 5075 init_ring_lists(&dev_priv->ring[i]); 5076 for (i = 0; i < I915_MAX_NUM_FENCES; i++) 5077 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); 5078 INIT_DELAYED_WORK(&dev_priv->mm.retire_work, 5079 i915_gem_retire_work_handler); 5080 INIT_DELAYED_WORK(&dev_priv->mm.idle_work, 5081 i915_gem_idle_work_handler); 5082 init_waitqueue_head(&dev_priv->gpu_error.reset_queue); 5083 5084 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */ 5085 if (!drm_core_check_feature(dev, DRIVER_MODESET) && IS_GEN3(dev)) { 5086 I915_WRITE(MI_ARB_STATE, 5087 _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE)); 5088 } 5089 5090 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; 5091 5092 /* Old X drivers will take 0-2 for front, back, depth buffers */ 5093 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 5094 dev_priv->fence_reg_start = 3; 5095 5096 if (INTEL_INFO(dev)->gen >= 7 && !IS_VALLEYVIEW(dev)) 5097 dev_priv->num_fence_regs = 32; 5098 else if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) 5099 dev_priv->num_fence_regs = 16; 5100 else 5101 dev_priv->num_fence_regs = 8; 5102 5103 /* Initialize fence registers to zero */ 5104 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 5105 i915_gem_restore_fences(dev); 5106 5107 i915_gem_detect_bit_6_swizzle(dev); 5108 init_waitqueue_head(&dev_priv->pending_flip_queue); 5109 5110 dev_priv->mm.interruptible = true; 5111 5112 #if 0 5113 dev_priv->mm.shrinker.scan_objects = i915_gem_shrinker_scan; 5114 dev_priv->mm.shrinker.count_objects = i915_gem_shrinker_count; 5115 dev_priv->mm.shrinker.seeks = DEFAULT_SEEKS; 5116 register_shrinker(&dev_priv->mm.shrinker); 5117 5118 dev_priv->mm.oom_notifier.notifier_call = i915_gem_shrinker_oom; 5119 register_oom_notifier(&dev_priv->mm.oom_notifier); 5120 #endif 5121 5122 lockinit(&dev_priv->fb_tracking.lock, "drmftl", 0, LK_CANRECURSE); 5123 } 5124 5125 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 5126 { 5127 struct drm_i915_file_private *file_priv = file->driver_priv; 5128 5129 cancel_delayed_work_sync(&file_priv->mm.idle_work); 5130 5131 /* Clean up our request list when the client is going away, so that 5132 * later retire_requests won't dereference our soon-to-be-gone 5133 * file_priv. 5134 */ 5135 spin_lock(&file_priv->mm.lock); 5136 while (!list_empty(&file_priv->mm.request_list)) { 5137 struct drm_i915_gem_request *request; 5138 5139 request = list_first_entry(&file_priv->mm.request_list, 5140 struct drm_i915_gem_request, 5141 client_list); 5142 list_del(&request->client_list); 5143 request->file_priv = NULL; 5144 } 5145 spin_unlock(&file_priv->mm.lock); 5146 } 5147 5148 int 5149 i915_gem_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, 5150 vm_ooffset_t foff, struct ucred *cred, u_short *color) 5151 { 5152 *color = 0; /* XXXKIB */ 5153 return (0); 5154 } 5155 5156 void 5157 i915_gem_pager_dtor(void *handle) 5158 { 5159 struct drm_gem_object *obj; 5160 struct drm_device *dev; 5161 5162 obj = handle; 5163 dev = obj->dev; 5164 5165 mutex_lock(&dev->struct_mutex); 5166 drm_gem_free_mmap_offset(obj); 5167 i915_gem_release_mmap(to_intel_bo(obj)); 5168 drm_gem_object_unreference(obj); 5169 mutex_unlock(&dev->struct_mutex); 5170 } 5171 5172 static void 5173 i915_gem_file_idle_work_handler(struct work_struct *work) 5174 { 5175 struct drm_i915_file_private *file_priv = 5176 container_of(work, typeof(*file_priv), mm.idle_work.work); 5177 5178 atomic_set(&file_priv->rps_wait_boost, false); 5179 } 5180 5181 int i915_gem_open(struct drm_device *dev, struct drm_file *file) 5182 { 5183 struct drm_i915_file_private *file_priv; 5184 int ret; 5185 5186 DRM_DEBUG_DRIVER("\n"); 5187 5188 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 5189 if (!file_priv) 5190 return -ENOMEM; 5191 5192 file->driver_priv = file_priv; 5193 file_priv->dev_priv = dev->dev_private; 5194 file_priv->file = file; 5195 5196 spin_init(&file_priv->mm.lock, "i915_priv"); 5197 INIT_LIST_HEAD(&file_priv->mm.request_list); 5198 INIT_DELAYED_WORK(&file_priv->mm.idle_work, 5199 i915_gem_file_idle_work_handler); 5200 5201 ret = i915_gem_context_open(dev, file); 5202 if (ret) 5203 kfree(file_priv); 5204 5205 return ret; 5206 } 5207 5208 void i915_gem_track_fb(struct drm_i915_gem_object *old, 5209 struct drm_i915_gem_object *new, 5210 unsigned frontbuffer_bits) 5211 { 5212 if (old) { 5213 WARN_ON(!mutex_is_locked(&old->base.dev->struct_mutex)); 5214 WARN_ON(!(old->frontbuffer_bits & frontbuffer_bits)); 5215 old->frontbuffer_bits &= ~frontbuffer_bits; 5216 } 5217 5218 if (new) { 5219 WARN_ON(!mutex_is_locked(&new->base.dev->struct_mutex)); 5220 WARN_ON(new->frontbuffer_bits & frontbuffer_bits); 5221 new->frontbuffer_bits |= frontbuffer_bits; 5222 } 5223 } 5224 5225 #if 0 5226 static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task) 5227 { 5228 if (!mutex_is_locked(mutex)) 5229 return false; 5230 5231 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES) 5232 return mutex->owner == task; 5233 #else 5234 /* Since UP may be pre-empted, we cannot assume that we own the lock */ 5235 return false; 5236 #endif 5237 } 5238 #endif 5239 5240 #if 0 5241 static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock) 5242 { 5243 if (!mutex_trylock(&dev->struct_mutex)) { 5244 if (!mutex_is_locked_by(&dev->struct_mutex, current)) 5245 return false; 5246 5247 if (to_i915(dev)->mm.shrinker_no_lock_stealing) 5248 return false; 5249 5250 *unlock = false; 5251 } else 5252 *unlock = true; 5253 5254 return true; 5255 } 5256 5257 static int num_vma_bound(struct drm_i915_gem_object *obj) 5258 { 5259 struct i915_vma *vma; 5260 int count = 0; 5261 5262 list_for_each_entry(vma, &obj->vma_list, vma_link) 5263 if (drm_mm_node_allocated(&vma->node)) 5264 count++; 5265 5266 return count; 5267 } 5268 5269 static unsigned long 5270 i915_gem_inactive_count(struct shrinker *shrinker, struct shrink_control *sc) 5271 { 5272 struct drm_i915_private *dev_priv = 5273 container_of(shrinker, 5274 struct drm_i915_private, 5275 mm.inactive_shrinker); 5276 struct drm_device *dev = dev_priv->dev; 5277 struct drm_i915_gem_object *obj; 5278 unsigned long count; 5279 bool unlock; 5280 5281 if (!i915_gem_shrinker_lock(dev, &unlock)) 5282 return 0; 5283 5284 count = 0; 5285 list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) 5286 if (obj->pages_pin_count == 0) 5287 count += obj->base.size >> PAGE_SHIFT; 5288 5289 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 5290 if (!i915_gem_obj_is_pinned(obj) && 5291 obj->pages_pin_count == num_vma_bound(obj)) 5292 count += obj->base.size >> PAGE_SHIFT; 5293 } 5294 5295 if (unlock) 5296 mutex_unlock(&dev->struct_mutex); 5297 5298 return count; 5299 } 5300 #endif 5301 5302 /* All the new VM stuff */ 5303 unsigned long i915_gem_obj_offset(struct drm_i915_gem_object *o, 5304 struct i915_address_space *vm) 5305 { 5306 struct drm_i915_private *dev_priv = o->base.dev->dev_private; 5307 struct i915_vma *vma; 5308 5309 if (!dev_priv->mm.aliasing_ppgtt || 5310 vm == &dev_priv->mm.aliasing_ppgtt->base) 5311 vm = &dev_priv->gtt.base; 5312 5313 list_for_each_entry(vma, &o->vma_list, vma_link) { 5314 if (vma->vm == vm) 5315 return vma->node.start; 5316 5317 } 5318 WARN(1, "%s vma for this object not found.\n", 5319 i915_is_ggtt(vm) ? "global" : "ppgtt"); 5320 return -1; 5321 } 5322 5323 bool i915_gem_obj_bound(struct drm_i915_gem_object *o, 5324 struct i915_address_space *vm) 5325 { 5326 struct i915_vma *vma; 5327 5328 list_for_each_entry(vma, &o->vma_list, vma_link) 5329 if (vma->vm == vm && drm_mm_node_allocated(&vma->node)) 5330 return true; 5331 5332 return false; 5333 } 5334 5335 bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o) 5336 { 5337 struct i915_vma *vma; 5338 5339 list_for_each_entry(vma, &o->vma_list, vma_link) 5340 if (drm_mm_node_allocated(&vma->node)) 5341 return true; 5342 5343 return false; 5344 } 5345 5346 unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o, 5347 struct i915_address_space *vm) 5348 { 5349 struct drm_i915_private *dev_priv = o->base.dev->dev_private; 5350 struct i915_vma *vma; 5351 5352 if (!dev_priv->mm.aliasing_ppgtt || 5353 vm == &dev_priv->mm.aliasing_ppgtt->base) 5354 vm = &dev_priv->gtt.base; 5355 5356 BUG_ON(list_empty(&o->vma_list)); 5357 5358 list_for_each_entry(vma, &o->vma_list, vma_link) 5359 if (vma->vm == vm) 5360 return vma->node.size; 5361 5362 return 0; 5363 } 5364 5365 #if 0 5366 static unsigned long 5367 i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) 5368 { 5369 struct drm_i915_private *dev_priv = 5370 container_of(shrinker, struct drm_i915_private, mm.shrinker); 5371 struct drm_device *dev = dev_priv->dev; 5372 unsigned long freed; 5373 bool unlock; 5374 5375 if (!i915_gem_shrinker_lock(dev, &unlock)) 5376 return SHRINK_STOP; 5377 5378 freed = i915_gem_purge(dev_priv, sc->nr_to_scan); 5379 if (freed < sc->nr_to_scan) 5380 freed += __i915_gem_shrink(dev_priv, 5381 sc->nr_to_scan - freed, 5382 false); 5383 if (freed < sc->nr_to_scan) 5384 freed += i915_gem_shrink_all(dev_priv); 5385 5386 if (unlock) 5387 mutex_unlock(&dev->struct_mutex); 5388 5389 return freed; 5390 } 5391 #endif 5392 5393 struct i915_vma *i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj) 5394 { 5395 struct i915_vma *vma; 5396 5397 /* This WARN has probably outlived its usefulness (callers already 5398 * WARN if they don't find the GGTT vma they expect). When removing, 5399 * remember to remove the pre-check in is_pin_display() as well */ 5400 if (WARN_ON(list_empty(&obj->vma_list))) 5401 return NULL; 5402 5403 vma = list_first_entry(&obj->vma_list, typeof(*vma), vma_link); 5404 if (vma->vm != obj_to_ggtt(obj)) 5405 return NULL; 5406 5407 return vma; 5408 } 5409