1 /* 2 * Copyright © 2008-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 * Copyright (c) 2011 The FreeBSD Foundation 27 * All rights reserved. 28 * 29 * This software was developed by Konstantin Belousov under sponsorship from 30 * the FreeBSD Foundation. 31 * 32 * Redistribution and use in source and binary forms, with or without 33 * modification, are permitted provided that the following conditions 34 * are met: 35 * 1. Redistributions of source code must retain the above copyright 36 * notice, this list of conditions and the following disclaimer. 37 * 2. Redistributions in binary form must reproduce the above copyright 38 * notice, this list of conditions and the following disclaimer in the 39 * documentation and/or other materials provided with the distribution. 40 * 41 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 51 * SUCH DAMAGE. 52 * 53 */ 54 55 #include <machine/md_var.h> 56 57 #include <drm/drmP.h> 58 #include <drm/drm_vma_manager.h> 59 #include <drm/i915_drm.h> 60 #include "i915_drv.h" 61 #include "i915_vgpu.h" 62 #include "i915_trace.h" 63 #include "intel_drv.h" 64 #include <linux/shmem_fs.h> 65 #include <linux/slab.h> 66 #include <linux/swap.h> 67 #include <linux/pci.h> 68 69 #define RQ_BUG_ON(expr) 70 71 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); 72 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); 73 static void 74 i915_gem_object_retire__write(struct drm_i915_gem_object *obj); 75 static void 76 i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring); 77 static void i915_gem_write_fence(struct drm_device *dev, int reg, 78 struct drm_i915_gem_object *obj); 79 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, 80 struct drm_i915_fence_reg *fence, 81 bool enable); 82 83 static bool cpu_cache_is_coherent(struct drm_device *dev, 84 enum i915_cache_level level) 85 { 86 return HAS_LLC(dev) || level != I915_CACHE_NONE; 87 } 88 89 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 90 { 91 if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) 92 return true; 93 94 return obj->pin_display; 95 } 96 97 static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) 98 { 99 if (obj->tiling_mode) 100 i915_gem_release_mmap(obj); 101 102 /* As we do not have an associated fence register, we will force 103 * a tiling change if we ever need to acquire one. 104 */ 105 obj->fence_dirty = false; 106 obj->fence_reg = I915_FENCE_REG_NONE; 107 } 108 109 /* some bookkeeping */ 110 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 111 size_t size) 112 { 113 spin_lock(&dev_priv->mm.object_stat_lock); 114 dev_priv->mm.object_count++; 115 dev_priv->mm.object_memory += size; 116 spin_unlock(&dev_priv->mm.object_stat_lock); 117 } 118 119 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 120 size_t size) 121 { 122 spin_lock(&dev_priv->mm.object_stat_lock); 123 dev_priv->mm.object_count--; 124 dev_priv->mm.object_memory -= size; 125 spin_unlock(&dev_priv->mm.object_stat_lock); 126 } 127 128 static int 129 i915_gem_wait_for_error(struct i915_gpu_error *error) 130 { 131 int ret; 132 133 #define EXIT_COND (!i915_reset_in_progress(error) || \ 134 i915_terminally_wedged(error)) 135 if (EXIT_COND) 136 return 0; 137 138 /* 139 * Only wait 10 seconds for the gpu reset to complete to avoid hanging 140 * userspace. If it takes that long something really bad is going on and 141 * we should simply try to bail out and fail as gracefully as possible. 142 */ 143 ret = wait_event_interruptible_timeout(error->reset_queue, 144 EXIT_COND, 145 10*HZ); 146 if (ret == 0) { 147 DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); 148 return -EIO; 149 } else if (ret < 0) { 150 return ret; 151 } 152 #undef EXIT_COND 153 154 return 0; 155 } 156 157 int i915_mutex_lock_interruptible(struct drm_device *dev) 158 { 159 struct drm_i915_private *dev_priv = dev->dev_private; 160 int ret; 161 162 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 163 if (ret) 164 return ret; 165 166 ret = mutex_lock_interruptible(&dev->struct_mutex); 167 if (ret) 168 return ret; 169 170 WARN_ON(i915_verify_lists(dev)); 171 return 0; 172 } 173 174 int 175 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 176 struct drm_file *file) 177 { 178 struct drm_i915_private *dev_priv = dev->dev_private; 179 struct drm_i915_gem_get_aperture *args = data; 180 struct drm_i915_gem_object *obj; 181 size_t pinned; 182 183 pinned = 0; 184 mutex_lock(&dev->struct_mutex); 185 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) 186 if (i915_gem_obj_is_pinned(obj)) 187 pinned += i915_gem_obj_ggtt_size(obj); 188 mutex_unlock(&dev->struct_mutex); 189 190 args->aper_size = dev_priv->gtt.base.total; 191 args->aper_available_size = args->aper_size - pinned; 192 193 return 0; 194 } 195 196 #if 0 197 static int 198 i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) 199 { 200 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; 201 char *vaddr = obj->phys_handle->vaddr; 202 struct sg_table *st; 203 struct scatterlist *sg; 204 int i; 205 206 if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) 207 return -EINVAL; 208 209 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 210 struct vm_page *page; 211 char *src; 212 213 page = shmem_read_mapping_page(mapping, i); 214 if (IS_ERR(page)) 215 return PTR_ERR(page); 216 217 src = kmap_atomic(page); 218 memcpy(vaddr, src, PAGE_SIZE); 219 drm_clflush_virt_range(vaddr, PAGE_SIZE); 220 kunmap_atomic(src); 221 222 page_cache_release(page); 223 vaddr += PAGE_SIZE; 224 } 225 226 i915_gem_chipset_flush(obj->base.dev); 227 228 st = kmalloc(sizeof(*st), GFP_KERNEL); 229 if (st == NULL) 230 return -ENOMEM; 231 232 if (sg_alloc_table(st, 1, GFP_KERNEL)) { 233 kfree(st); 234 return -ENOMEM; 235 } 236 237 sg = st->sgl; 238 sg->offset = 0; 239 sg->length = obj->base.size; 240 241 sg_dma_address(sg) = obj->phys_handle->busaddr; 242 sg_dma_len(sg) = obj->base.size; 243 244 obj->pages = st; 245 return 0; 246 } 247 248 static void 249 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj) 250 { 251 int ret; 252 253 BUG_ON(obj->madv == __I915_MADV_PURGED); 254 255 ret = i915_gem_object_set_to_cpu_domain(obj, true); 256 if (ret) { 257 /* In the event of a disaster, abandon all caches and 258 * hope for the best. 259 */ 260 WARN_ON(ret != -EIO); 261 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 262 } 263 264 if (obj->madv == I915_MADV_DONTNEED) 265 obj->dirty = 0; 266 267 if (obj->dirty) { 268 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; 269 char *vaddr = obj->phys_handle->vaddr; 270 int i; 271 272 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 273 struct page *page; 274 char *dst; 275 276 page = shmem_read_mapping_page(mapping, i); 277 if (IS_ERR(page)) 278 continue; 279 280 dst = kmap_atomic(page); 281 drm_clflush_virt_range(vaddr, PAGE_SIZE); 282 memcpy(dst, vaddr, PAGE_SIZE); 283 kunmap_atomic(dst); 284 285 set_page_dirty(page); 286 if (obj->madv == I915_MADV_WILLNEED) 287 mark_page_accessed(page); 288 page_cache_release(page); 289 vaddr += PAGE_SIZE; 290 } 291 obj->dirty = 0; 292 } 293 294 sg_free_table(obj->pages); 295 kfree(obj->pages); 296 } 297 298 static void 299 i915_gem_object_release_phys(struct drm_i915_gem_object *obj) 300 { 301 drm_pci_free(obj->base.dev, obj->phys_handle); 302 } 303 304 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { 305 .get_pages = i915_gem_object_get_pages_phys, 306 .put_pages = i915_gem_object_put_pages_phys, 307 .release = i915_gem_object_release_phys, 308 }; 309 #endif 310 311 static int 312 drop_pages(struct drm_i915_gem_object *obj) 313 { 314 struct i915_vma *vma, *next; 315 int ret; 316 317 drm_gem_object_reference(&obj->base); 318 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) 319 if (i915_vma_unbind(vma)) 320 break; 321 322 ret = i915_gem_object_put_pages(obj); 323 drm_gem_object_unreference(&obj->base); 324 325 return ret; 326 } 327 328 int 329 i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, 330 int align) 331 { 332 drm_dma_handle_t *phys; 333 int ret; 334 335 if (obj->phys_handle) { 336 if ((unsigned long)obj->phys_handle->vaddr & (align -1)) 337 return -EBUSY; 338 339 return 0; 340 } 341 342 if (obj->madv != I915_MADV_WILLNEED) 343 return -EFAULT; 344 345 #if 0 346 if (obj->base.filp == NULL) 347 return -EINVAL; 348 #endif 349 350 ret = drop_pages(obj); 351 if (ret) 352 return ret; 353 354 /* create a new object */ 355 phys = drm_pci_alloc(obj->base.dev, obj->base.size, align); 356 if (!phys) 357 return -ENOMEM; 358 359 obj->phys_handle = phys; 360 #if 0 361 obj->ops = &i915_gem_phys_ops; 362 #endif 363 364 return i915_gem_object_get_pages(obj); 365 } 366 367 static int 368 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, 369 struct drm_i915_gem_pwrite *args, 370 struct drm_file *file_priv) 371 { 372 struct drm_device *dev = obj->base.dev; 373 void *vaddr = (char *)obj->phys_handle->vaddr + args->offset; 374 char __user *user_data = to_user_ptr(args->data_ptr); 375 int ret = 0; 376 377 /* We manually control the domain here and pretend that it 378 * remains coherent i.e. in the GTT domain, like shmem_pwrite. 379 */ 380 ret = i915_gem_object_wait_rendering(obj, false); 381 if (ret) 382 return ret; 383 384 intel_fb_obj_invalidate(obj, NULL, ORIGIN_CPU); 385 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) { 386 unsigned long unwritten; 387 388 /* The physical object once assigned is fixed for the lifetime 389 * of the obj, so we can safely drop the lock and continue 390 * to access vaddr. 391 */ 392 mutex_unlock(&dev->struct_mutex); 393 unwritten = copy_from_user(vaddr, user_data, args->size); 394 mutex_lock(&dev->struct_mutex); 395 if (unwritten) { 396 ret = -EFAULT; 397 goto out; 398 } 399 } 400 401 drm_clflush_virt_range(vaddr, args->size); 402 i915_gem_chipset_flush(dev); 403 404 out: 405 intel_fb_obj_flush(obj, false); 406 return ret; 407 } 408 409 void *i915_gem_object_alloc(struct drm_device *dev) 410 { 411 return kmalloc(sizeof(struct drm_i915_gem_object), 412 M_DRM, M_WAITOK | M_ZERO); 413 } 414 415 void i915_gem_object_free(struct drm_i915_gem_object *obj) 416 { 417 kfree(obj); 418 } 419 420 static int 421 i915_gem_create(struct drm_file *file, 422 struct drm_device *dev, 423 uint64_t size, 424 uint32_t *handle_p) 425 { 426 struct drm_i915_gem_object *obj; 427 int ret; 428 u32 handle; 429 430 size = roundup(size, PAGE_SIZE); 431 if (size == 0) 432 return -EINVAL; 433 434 /* Allocate the new object */ 435 obj = i915_gem_alloc_object(dev, size); 436 if (obj == NULL) 437 return -ENOMEM; 438 439 ret = drm_gem_handle_create(file, &obj->base, &handle); 440 /* drop reference from allocate - handle holds it now */ 441 drm_gem_object_unreference_unlocked(&obj->base); 442 if (ret) 443 return ret; 444 445 *handle_p = handle; 446 return 0; 447 } 448 449 int 450 i915_gem_dumb_create(struct drm_file *file, 451 struct drm_device *dev, 452 struct drm_mode_create_dumb *args) 453 { 454 /* have to work out size/pitch and return them */ 455 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); 456 args->size = args->pitch * args->height; 457 return i915_gem_create(file, dev, 458 args->size, &args->handle); 459 } 460 461 /** 462 * Creates a new mm object and returns a handle to it. 463 */ 464 int 465 i915_gem_create_ioctl(struct drm_device *dev, void *data, 466 struct drm_file *file) 467 { 468 struct drm_i915_gem_create *args = data; 469 470 return i915_gem_create(file, dev, 471 args->size, &args->handle); 472 } 473 474 static inline int 475 __copy_to_user_swizzled(char __user *cpu_vaddr, 476 const char *gpu_vaddr, int gpu_offset, 477 int length) 478 { 479 int ret, cpu_offset = 0; 480 481 while (length > 0) { 482 int cacheline_end = ALIGN(gpu_offset + 1, 64); 483 int this_length = min(cacheline_end - gpu_offset, length); 484 int swizzled_gpu_offset = gpu_offset ^ 64; 485 486 ret = __copy_to_user(cpu_vaddr + cpu_offset, 487 gpu_vaddr + swizzled_gpu_offset, 488 this_length); 489 if (ret) 490 return ret + length; 491 492 cpu_offset += this_length; 493 gpu_offset += this_length; 494 length -= this_length; 495 } 496 497 return 0; 498 } 499 500 static inline int 501 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, 502 const char __user *cpu_vaddr, 503 int length) 504 { 505 int ret, cpu_offset = 0; 506 507 while (length > 0) { 508 int cacheline_end = ALIGN(gpu_offset + 1, 64); 509 int this_length = min(cacheline_end - gpu_offset, length); 510 int swizzled_gpu_offset = gpu_offset ^ 64; 511 512 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, 513 cpu_vaddr + cpu_offset, 514 this_length); 515 if (ret) 516 return ret + length; 517 518 cpu_offset += this_length; 519 gpu_offset += this_length; 520 length -= this_length; 521 } 522 523 return 0; 524 } 525 526 /* 527 * Pins the specified object's pages and synchronizes the object with 528 * GPU accesses. Sets needs_clflush to non-zero if the caller should 529 * flush the object from the CPU cache. 530 */ 531 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, 532 int *needs_clflush) 533 { 534 int ret; 535 536 *needs_clflush = 0; 537 538 #if 0 539 if (!obj->base.filp) 540 return -EINVAL; 541 #endif 542 543 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) { 544 /* If we're not in the cpu read domain, set ourself into the gtt 545 * read domain and manually flush cachelines (if required). This 546 * optimizes for the case when the gpu will dirty the data 547 * anyway again before the next pread happens. */ 548 *needs_clflush = !cpu_cache_is_coherent(obj->base.dev, 549 obj->cache_level); 550 ret = i915_gem_object_wait_rendering(obj, true); 551 if (ret) 552 return ret; 553 } 554 555 ret = i915_gem_object_get_pages(obj); 556 if (ret) 557 return ret; 558 559 i915_gem_object_pin_pages(obj); 560 561 return ret; 562 } 563 564 /* Per-page copy function for the shmem pread fastpath. 565 * Flushes invalid cachelines before reading the target if 566 * needs_clflush is set. */ 567 static int 568 shmem_pread_fast(struct vm_page *page, int shmem_page_offset, int page_length, 569 char __user *user_data, 570 bool page_do_bit17_swizzling, bool needs_clflush) 571 { 572 char *vaddr; 573 int ret; 574 575 if (unlikely(page_do_bit17_swizzling)) 576 return -EINVAL; 577 578 vaddr = kmap_atomic(page); 579 if (needs_clflush) 580 drm_clflush_virt_range(vaddr + shmem_page_offset, 581 page_length); 582 ret = __copy_to_user_inatomic(user_data, 583 vaddr + shmem_page_offset, 584 page_length); 585 kunmap_atomic(vaddr); 586 587 return ret ? -EFAULT : 0; 588 } 589 590 static void 591 shmem_clflush_swizzled_range(char *addr, unsigned long length, 592 bool swizzled) 593 { 594 if (unlikely(swizzled)) { 595 unsigned long start = (unsigned long) addr; 596 unsigned long end = (unsigned long) addr + length; 597 598 /* For swizzling simply ensure that we always flush both 599 * channels. Lame, but simple and it works. Swizzled 600 * pwrite/pread is far from a hotpath - current userspace 601 * doesn't use it at all. */ 602 start = round_down(start, 128); 603 end = round_up(end, 128); 604 605 drm_clflush_virt_range((void *)start, end - start); 606 } else { 607 drm_clflush_virt_range(addr, length); 608 } 609 610 } 611 612 /* Only difference to the fast-path function is that this can handle bit17 613 * and uses non-atomic copy and kmap functions. */ 614 static int 615 shmem_pread_slow(struct vm_page *page, int shmem_page_offset, int page_length, 616 char __user *user_data, 617 bool page_do_bit17_swizzling, bool needs_clflush) 618 { 619 char *vaddr; 620 int ret; 621 622 vaddr = kmap(page); 623 if (needs_clflush) 624 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 625 page_length, 626 page_do_bit17_swizzling); 627 628 if (page_do_bit17_swizzling) 629 ret = __copy_to_user_swizzled(user_data, 630 vaddr, shmem_page_offset, 631 page_length); 632 else 633 ret = __copy_to_user(user_data, 634 vaddr + shmem_page_offset, 635 page_length); 636 kunmap(page); 637 638 return ret ? - EFAULT : 0; 639 } 640 641 static int 642 i915_gem_shmem_pread(struct drm_device *dev, 643 struct drm_i915_gem_object *obj, 644 struct drm_i915_gem_pread *args, 645 struct drm_file *file) 646 { 647 char __user *user_data; 648 ssize_t remain; 649 loff_t offset; 650 int shmem_page_offset, page_length, ret = 0; 651 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 652 int prefaulted = 0; 653 int needs_clflush = 0; 654 int i; 655 656 user_data = to_user_ptr(args->data_ptr); 657 remain = args->size; 658 659 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 660 661 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); 662 if (ret) 663 return ret; 664 665 offset = args->offset; 666 667 for (i = 0; i < (obj->base.size >> PAGE_SHIFT); i++) { 668 struct vm_page *page = obj->pages[i]; 669 670 if (remain <= 0) 671 break; 672 673 /* Operation in this page 674 * 675 * shmem_page_offset = offset within page in shmem file 676 * page_length = bytes to copy for this page 677 */ 678 shmem_page_offset = offset_in_page(offset); 679 page_length = remain; 680 if ((shmem_page_offset + page_length) > PAGE_SIZE) 681 page_length = PAGE_SIZE - shmem_page_offset; 682 683 page_do_bit17_swizzling = obj_do_bit17_swizzling && 684 (page_to_phys(page) & (1 << 17)) != 0; 685 686 ret = shmem_pread_fast(page, shmem_page_offset, page_length, 687 user_data, page_do_bit17_swizzling, 688 needs_clflush); 689 if (ret == 0) 690 goto next_page; 691 692 mutex_unlock(&dev->struct_mutex); 693 694 if (likely(!i915.prefault_disable) && !prefaulted) { 695 ret = fault_in_multipages_writeable(user_data, remain); 696 /* Userspace is tricking us, but we've already clobbered 697 * its pages with the prefault and promised to write the 698 * data up to the first fault. Hence ignore any errors 699 * and just continue. */ 700 (void)ret; 701 prefaulted = 1; 702 } 703 704 ret = shmem_pread_slow(page, shmem_page_offset, page_length, 705 user_data, page_do_bit17_swizzling, 706 needs_clflush); 707 708 mutex_lock(&dev->struct_mutex); 709 710 if (ret) 711 goto out; 712 713 next_page: 714 remain -= page_length; 715 user_data += page_length; 716 offset += page_length; 717 } 718 719 out: 720 i915_gem_object_unpin_pages(obj); 721 722 return ret; 723 } 724 725 /** 726 * Reads data from the object referenced by handle. 727 * 728 * On error, the contents of *data are undefined. 729 */ 730 int 731 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 732 struct drm_file *file) 733 { 734 struct drm_i915_gem_pread *args = data; 735 struct drm_i915_gem_object *obj; 736 int ret = 0; 737 738 if (args->size == 0) 739 return 0; 740 741 ret = i915_mutex_lock_interruptible(dev); 742 if (ret) 743 return ret; 744 745 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 746 if (&obj->base == NULL) { 747 ret = -ENOENT; 748 goto unlock; 749 } 750 751 /* Bounds check source. */ 752 if (args->offset > obj->base.size || 753 args->size > obj->base.size - args->offset) { 754 ret = -EINVAL; 755 goto out; 756 } 757 758 trace_i915_gem_object_pread(obj, args->offset, args->size); 759 760 ret = i915_gem_shmem_pread(dev, obj, args, file); 761 762 out: 763 drm_gem_object_unreference(&obj->base); 764 unlock: 765 mutex_unlock(&dev->struct_mutex); 766 return ret; 767 } 768 769 /* This is the fast write path which cannot handle 770 * page faults in the source data 771 */ 772 773 static inline int 774 fast_user_write(struct io_mapping *mapping, 775 loff_t page_base, int page_offset, 776 char __user *user_data, 777 int length) 778 { 779 void __iomem *vaddr_atomic; 780 void *vaddr; 781 unsigned long unwritten; 782 783 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); 784 /* We can use the cpu mem copy function because this is X86. */ 785 vaddr = (char __force*)vaddr_atomic + page_offset; 786 unwritten = __copy_from_user_inatomic_nocache(vaddr, 787 user_data, length); 788 io_mapping_unmap_atomic(vaddr_atomic); 789 return unwritten; 790 } 791 792 /** 793 * This is the fast pwrite path, where we copy the data directly from the 794 * user into the GTT, uncached. 795 */ 796 static int 797 i915_gem_gtt_pwrite_fast(struct drm_device *dev, 798 struct drm_i915_gem_object *obj, 799 struct drm_i915_gem_pwrite *args, 800 struct drm_file *file) 801 { 802 struct drm_i915_private *dev_priv = dev->dev_private; 803 ssize_t remain; 804 loff_t offset, page_base; 805 char __user *user_data; 806 int page_offset, page_length, ret; 807 808 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK); 809 if (ret) 810 goto out; 811 812 ret = i915_gem_object_set_to_gtt_domain(obj, true); 813 if (ret) 814 goto out_unpin; 815 816 ret = i915_gem_object_put_fence(obj); 817 if (ret) 818 goto out_unpin; 819 820 user_data = to_user_ptr(args->data_ptr); 821 remain = args->size; 822 823 offset = i915_gem_obj_ggtt_offset(obj) + args->offset; 824 825 intel_fb_obj_invalidate(obj, NULL, ORIGIN_GTT); 826 827 while (remain > 0) { 828 /* Operation in this page 829 * 830 * page_base = page offset within aperture 831 * page_offset = offset within page 832 * page_length = bytes to copy for this page 833 */ 834 page_base = offset & ~PAGE_MASK; 835 page_offset = offset_in_page(offset); 836 page_length = remain; 837 if ((page_offset + remain) > PAGE_SIZE) 838 page_length = PAGE_SIZE - page_offset; 839 840 /* If we get a fault while copying data, then (presumably) our 841 * source page isn't available. Return the error and we'll 842 * retry in the slow path. 843 */ 844 if (fast_user_write(dev_priv->gtt.mappable, page_base, 845 page_offset, user_data, page_length)) { 846 ret = -EFAULT; 847 goto out_flush; 848 } 849 850 remain -= page_length; 851 user_data += page_length; 852 offset += page_length; 853 } 854 855 out_flush: 856 intel_fb_obj_flush(obj, false); 857 out_unpin: 858 i915_gem_object_ggtt_unpin(obj); 859 out: 860 return ret; 861 } 862 863 /* Per-page copy function for the shmem pwrite fastpath. 864 * Flushes invalid cachelines before writing to the target if 865 * needs_clflush_before is set and flushes out any written cachelines after 866 * writing if needs_clflush is set. */ 867 static int 868 shmem_pwrite_fast(struct vm_page *page, int shmem_page_offset, int page_length, 869 char __user *user_data, 870 bool page_do_bit17_swizzling, 871 bool needs_clflush_before, 872 bool needs_clflush_after) 873 { 874 char *vaddr; 875 int ret; 876 877 if (unlikely(page_do_bit17_swizzling)) 878 return -EINVAL; 879 880 vaddr = kmap_atomic(page); 881 if (needs_clflush_before) 882 drm_clflush_virt_range(vaddr + shmem_page_offset, 883 page_length); 884 ret = __copy_from_user_inatomic(vaddr + shmem_page_offset, 885 user_data, page_length); 886 if (needs_clflush_after) 887 drm_clflush_virt_range(vaddr + shmem_page_offset, 888 page_length); 889 kunmap_atomic(vaddr); 890 891 return ret ? -EFAULT : 0; 892 } 893 894 /* Only difference to the fast-path function is that this can handle bit17 895 * and uses non-atomic copy and kmap functions. */ 896 static int 897 shmem_pwrite_slow(struct vm_page *page, int shmem_page_offset, int page_length, 898 char __user *user_data, 899 bool page_do_bit17_swizzling, 900 bool needs_clflush_before, 901 bool needs_clflush_after) 902 { 903 char *vaddr; 904 int ret; 905 906 vaddr = kmap(page); 907 if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) 908 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 909 page_length, 910 page_do_bit17_swizzling); 911 if (page_do_bit17_swizzling) 912 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset, 913 user_data, 914 page_length); 915 else 916 ret = __copy_from_user(vaddr + shmem_page_offset, 917 user_data, 918 page_length); 919 if (needs_clflush_after) 920 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 921 page_length, 922 page_do_bit17_swizzling); 923 kunmap(page); 924 925 return ret ? -EFAULT : 0; 926 } 927 928 static int 929 i915_gem_shmem_pwrite(struct drm_device *dev, 930 struct drm_i915_gem_object *obj, 931 struct drm_i915_gem_pwrite *args, 932 struct drm_file *file) 933 { 934 ssize_t remain; 935 loff_t offset; 936 char __user *user_data; 937 int shmem_page_offset, page_length, ret = 0; 938 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 939 int hit_slowpath = 0; 940 int needs_clflush_after = 0; 941 int needs_clflush_before = 0; 942 int i; 943 944 user_data = to_user_ptr(args->data_ptr); 945 remain = args->size; 946 947 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 948 949 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 950 /* If we're not in the cpu write domain, set ourself into the gtt 951 * write domain and manually flush cachelines (if required). This 952 * optimizes for the case when the gpu will use the data 953 * right away and we therefore have to clflush anyway. */ 954 needs_clflush_after = cpu_write_needs_clflush(obj); 955 ret = i915_gem_object_wait_rendering(obj, false); 956 if (ret) 957 return ret; 958 } 959 /* Same trick applies to invalidate partially written cachelines read 960 * before writing. */ 961 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) 962 needs_clflush_before = 963 !cpu_cache_is_coherent(dev, obj->cache_level); 964 965 ret = i915_gem_object_get_pages(obj); 966 if (ret) 967 return ret; 968 969 intel_fb_obj_invalidate(obj, NULL, ORIGIN_CPU); 970 971 i915_gem_object_pin_pages(obj); 972 973 offset = args->offset; 974 obj->dirty = 1; 975 976 VM_OBJECT_LOCK(obj->base.vm_obj); 977 vm_object_pip_add(obj->base.vm_obj, 1); 978 for (i = 0; i < (obj->base.size >> PAGE_SHIFT); i++) { 979 struct vm_page *page = obj->pages[i]; 980 int partial_cacheline_write; 981 982 if (i < offset >> PAGE_SHIFT) 983 continue; 984 985 if (remain <= 0) 986 break; 987 988 /* Operation in this page 989 * 990 * shmem_page_offset = offset within page in shmem file 991 * page_length = bytes to copy for this page 992 */ 993 shmem_page_offset = offset_in_page(offset); 994 995 page_length = remain; 996 if ((shmem_page_offset + page_length) > PAGE_SIZE) 997 page_length = PAGE_SIZE - shmem_page_offset; 998 999 /* If we don't overwrite a cacheline completely we need to be 1000 * careful to have up-to-date data by first clflushing. Don't 1001 * overcomplicate things and flush the entire patch. */ 1002 partial_cacheline_write = needs_clflush_before && 1003 ((shmem_page_offset | page_length) 1004 & (cpu_clflush_line_size - 1)); 1005 1006 page_do_bit17_swizzling = obj_do_bit17_swizzling && 1007 (page_to_phys(page) & (1 << 17)) != 0; 1008 1009 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length, 1010 user_data, page_do_bit17_swizzling, 1011 partial_cacheline_write, 1012 needs_clflush_after); 1013 if (ret == 0) 1014 goto next_page; 1015 1016 hit_slowpath = 1; 1017 mutex_unlock(&dev->struct_mutex); 1018 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length, 1019 user_data, page_do_bit17_swizzling, 1020 partial_cacheline_write, 1021 needs_clflush_after); 1022 1023 mutex_lock(&dev->struct_mutex); 1024 1025 if (ret) 1026 goto out; 1027 1028 next_page: 1029 remain -= page_length; 1030 user_data += page_length; 1031 offset += page_length; 1032 } 1033 vm_object_pip_wakeup(obj->base.vm_obj); 1034 VM_OBJECT_UNLOCK(obj->base.vm_obj); 1035 1036 out: 1037 i915_gem_object_unpin_pages(obj); 1038 1039 if (hit_slowpath) { 1040 /* 1041 * Fixup: Flush cpu caches in case we didn't flush the dirty 1042 * cachelines in-line while writing and the object moved 1043 * out of the cpu write domain while we've dropped the lock. 1044 */ 1045 if (!needs_clflush_after && 1046 obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 1047 if (i915_gem_clflush_object(obj, obj->pin_display)) 1048 i915_gem_chipset_flush(dev); 1049 } 1050 } 1051 1052 if (needs_clflush_after) 1053 i915_gem_chipset_flush(dev); 1054 1055 intel_fb_obj_flush(obj, false); 1056 return ret; 1057 } 1058 1059 /** 1060 * Writes data to the object referenced by handle. 1061 * 1062 * On error, the contents of the buffer that were to be modified are undefined. 1063 */ 1064 int 1065 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 1066 struct drm_file *file) 1067 { 1068 struct drm_i915_private *dev_priv = dev->dev_private; 1069 struct drm_i915_gem_pwrite *args = data; 1070 struct drm_i915_gem_object *obj; 1071 int ret; 1072 1073 if (args->size == 0) 1074 return 0; 1075 1076 if (likely(!i915.prefault_disable)) { 1077 ret = fault_in_multipages_readable(to_user_ptr(args->data_ptr), 1078 args->size); 1079 if (ret) 1080 return -EFAULT; 1081 } 1082 1083 intel_runtime_pm_get(dev_priv); 1084 1085 ret = i915_mutex_lock_interruptible(dev); 1086 if (ret) 1087 goto put_rpm; 1088 1089 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1090 if (&obj->base == NULL) { 1091 ret = -ENOENT; 1092 goto unlock; 1093 } 1094 1095 /* Bounds check destination. */ 1096 if (args->offset > obj->base.size || 1097 args->size > obj->base.size - args->offset) { 1098 ret = -EINVAL; 1099 goto out; 1100 } 1101 1102 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 1103 1104 ret = -EFAULT; 1105 /* We can only do the GTT pwrite on untiled buffers, as otherwise 1106 * it would end up going through the fenced access, and we'll get 1107 * different detiling behavior between reading and writing. 1108 * pread/pwrite currently are reading and writing from the CPU 1109 * perspective, requiring manual detiling by the client. 1110 */ 1111 1112 if (obj->tiling_mode == I915_TILING_NONE && 1113 obj->base.write_domain != I915_GEM_DOMAIN_CPU && 1114 cpu_write_needs_clflush(obj)) { 1115 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); 1116 /* Note that the gtt paths might fail with non-page-backed user 1117 * pointers (e.g. gtt mappings when moving data between 1118 * textures). Fallback to the shmem path in that case. */ 1119 } 1120 1121 if (ret == -EFAULT || ret == -ENOSPC) { 1122 if (obj->phys_handle) 1123 ret = i915_gem_phys_pwrite(obj, args, file); 1124 else 1125 ret = i915_gem_shmem_pwrite(dev, obj, args, file); 1126 } 1127 1128 out: 1129 drm_gem_object_unreference(&obj->base); 1130 unlock: 1131 mutex_unlock(&dev->struct_mutex); 1132 put_rpm: 1133 intel_runtime_pm_put(dev_priv); 1134 1135 return ret; 1136 } 1137 1138 int 1139 i915_gem_check_wedge(struct i915_gpu_error *error, 1140 bool interruptible) 1141 { 1142 if (i915_reset_in_progress(error)) { 1143 /* Non-interruptible callers can't handle -EAGAIN, hence return 1144 * -EIO unconditionally for these. */ 1145 if (!interruptible) 1146 return -EIO; 1147 1148 /* Recovery complete, but the reset failed ... */ 1149 if (i915_terminally_wedged(error)) 1150 return -EIO; 1151 1152 /* 1153 * Check if GPU Reset is in progress - we need intel_ring_begin 1154 * to work properly to reinit the hw state while the gpu is 1155 * still marked as reset-in-progress. Handle this with a flag. 1156 */ 1157 if (!error->reload_in_reset) 1158 return -EAGAIN; 1159 } 1160 1161 return 0; 1162 } 1163 1164 /* 1165 * Compare arbitrary request against outstanding lazy request. Emit on match. 1166 */ 1167 int 1168 i915_gem_check_olr(struct drm_i915_gem_request *req) 1169 { 1170 int ret; 1171 1172 WARN_ON(!mutex_is_locked(&req->ring->dev->struct_mutex)); 1173 1174 ret = 0; 1175 if (req == req->ring->outstanding_lazy_request) 1176 ret = i915_add_request(req->ring); 1177 1178 return ret; 1179 } 1180 1181 #if 0 1182 static void fake_irq(unsigned long data) 1183 { 1184 wake_up_process((struct task_struct *)data); 1185 } 1186 1187 static bool missed_irq(struct drm_i915_private *dev_priv, 1188 struct intel_engine_cs *ring) 1189 { 1190 return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings); 1191 } 1192 #endif 1193 1194 #if 0 1195 static int __i915_spin_request(struct drm_i915_gem_request *req) 1196 { 1197 unsigned long timeout; 1198 1199 if (i915_gem_request_get_ring(req)->irq_refcount) 1200 return -EBUSY; 1201 1202 timeout = jiffies + 1; 1203 while (!need_resched()) { 1204 if (i915_gem_request_completed(req, true)) 1205 return 0; 1206 1207 if (time_after_eq(jiffies, timeout)) 1208 break; 1209 1210 cpu_relax_lowlatency(); 1211 } 1212 if (i915_gem_request_completed(req, false)) 1213 return 0; 1214 1215 return -EAGAIN; 1216 } 1217 #endif 1218 1219 /** 1220 * __i915_wait_request - wait until execution of request has finished 1221 * @req: duh! 1222 * @reset_counter: reset sequence associated with the given request 1223 * @interruptible: do an interruptible wait (normally yes) 1224 * @timeout: in - how long to wait (NULL forever); out - how much time remaining 1225 * 1226 * Note: It is of utmost importance that the passed in seqno and reset_counter 1227 * values have been read by the caller in an smp safe manner. Where read-side 1228 * locks are involved, it is sufficient to read the reset_counter before 1229 * unlocking the lock that protects the seqno. For lockless tricks, the 1230 * reset_counter _must_ be read before, and an appropriate smp_rmb must be 1231 * inserted. 1232 * 1233 * Returns 0 if the request was found within the alloted time. Else returns the 1234 * errno with remaining time filled in timeout argument. 1235 */ 1236 int __i915_wait_request(struct drm_i915_gem_request *req, 1237 unsigned reset_counter, 1238 bool interruptible, 1239 s64 *timeout, 1240 struct intel_rps_client *rps) 1241 { 1242 struct intel_engine_cs *ring = i915_gem_request_get_ring(req); 1243 struct drm_device *dev = ring->dev; 1244 struct drm_i915_private *dev_priv = dev->dev_private; 1245 const bool irq_test_in_progress = 1246 ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring); 1247 unsigned long timeout_expire; 1248 long end; 1249 bool wait_forever = true; 1250 s64 before, now; 1251 int ret; 1252 1253 WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled"); 1254 1255 if (list_empty(&req->list)) 1256 return 0; 1257 1258 if (i915_gem_request_completed(req, true)) 1259 return 0; 1260 1261 if (timeout != NULL) 1262 wait_forever = false; 1263 1264 timeout_expire = timeout ? 1265 jiffies + nsecs_to_jiffies_timeout((u64)*timeout) : 0; 1266 1267 if (INTEL_INFO(dev_priv)->gen >= 6) 1268 gen6_rps_boost(dev_priv, rps, req->emitted_jiffies); 1269 1270 if (!irq_test_in_progress && WARN_ON(!ring->irq_get(ring))) 1271 return -ENODEV; 1272 1273 /* Record current time in case interrupted by signal, or wedged */ 1274 trace_i915_gem_request_wait_begin(req); 1275 before = ktime_get_raw_ns(); 1276 1277 /* Optimistic spin for the next jiffie before touching IRQs */ 1278 #if 0 1279 ret = __i915_spin_request(req); 1280 if (ret == 0) 1281 goto out; 1282 #endif 1283 1284 #define EXIT_COND \ 1285 (i915_seqno_passed(ring->get_seqno(ring, false), i915_gem_request_get_seqno(req)) || \ 1286 i915_reset_in_progress(&dev_priv->gpu_error) || \ 1287 reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) 1288 do { 1289 if (interruptible) 1290 end = wait_event_interruptible_timeout(ring->irq_queue, 1291 EXIT_COND, 1292 timeout_expire); 1293 else 1294 end = wait_event_timeout(ring->irq_queue, EXIT_COND, 1295 timeout_expire); 1296 1297 /* We need to check whether any gpu reset happened in between 1298 * the caller grabbing the seqno and now ... */ 1299 if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) 1300 end = -EAGAIN; 1301 1302 /* ... but upgrade the -EGAIN to an -EIO if the gpu is truely 1303 * gone. */ 1304 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); 1305 if (ret) 1306 end = ret; 1307 } while (end == 0 && wait_forever); 1308 1309 now = ktime_get_raw_ns(); 1310 trace_i915_gem_request_wait_end(req); 1311 1312 ring->irq_put(ring); 1313 #undef EXIT_COND 1314 1315 if (timeout) { 1316 s64 tres = *timeout - (now - before); 1317 1318 *timeout = tres < 0 ? 0 : tres; 1319 1320 /* 1321 * Apparently ktime isn't accurate enough and occasionally has a 1322 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch 1323 * things up to make the test happy. We allow up to 1 jiffy. 1324 * 1325 * This is a regrssion from the timespec->ktime conversion. 1326 */ 1327 if (ret == -ETIMEDOUT && *timeout < jiffies_to_usecs(1)*1000) 1328 *timeout = 0; 1329 } 1330 1331 switch (end) { 1332 case -EIO: 1333 case -EAGAIN: /* Wedged */ 1334 case -ERESTARTSYS: /* Signal */ 1335 return (int)end; 1336 case 0: /* Timeout */ 1337 return -ETIMEDOUT; /* -ETIME on Linux */ 1338 default: /* Completed */ 1339 WARN_ON(end < 0); /* We're not aware of other errors */ 1340 return 0; 1341 } 1342 } 1343 1344 static inline void 1345 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) 1346 { 1347 struct drm_i915_file_private *file_priv = request->file_priv; 1348 1349 if (!file_priv) 1350 return; 1351 1352 spin_lock(&file_priv->mm.lock); 1353 list_del(&request->client_list); 1354 request->file_priv = NULL; 1355 spin_unlock(&file_priv->mm.lock); 1356 } 1357 1358 static void i915_gem_request_retire(struct drm_i915_gem_request *request) 1359 { 1360 trace_i915_gem_request_retire(request); 1361 1362 /* We know the GPU must have read the request to have 1363 * sent us the seqno + interrupt, so use the position 1364 * of tail of the request to update the last known position 1365 * of the GPU head. 1366 * 1367 * Note this requires that we are always called in request 1368 * completion order. 1369 */ 1370 request->ringbuf->last_retired_head = request->postfix; 1371 1372 list_del_init(&request->list); 1373 i915_gem_request_remove_from_client(request); 1374 1375 #if 0 1376 put_pid(request->pid); 1377 #endif 1378 1379 i915_gem_request_unreference(request); 1380 } 1381 1382 static void 1383 __i915_gem_request_retire__upto(struct drm_i915_gem_request *req) 1384 { 1385 struct intel_engine_cs *engine = req->ring; 1386 struct drm_i915_gem_request *tmp; 1387 1388 lockdep_assert_held(&engine->dev->struct_mutex); 1389 1390 if (list_empty(&req->list)) 1391 return; 1392 1393 do { 1394 tmp = list_first_entry(&engine->request_list, 1395 typeof(*tmp), list); 1396 1397 i915_gem_request_retire(tmp); 1398 } while (tmp != req); 1399 1400 WARN_ON(i915_verify_lists(engine->dev)); 1401 } 1402 1403 /** 1404 * Waits for a request to be signaled, and cleans up the 1405 * request and object lists appropriately for that event. 1406 */ 1407 int 1408 i915_wait_request(struct drm_i915_gem_request *req) 1409 { 1410 struct drm_device *dev; 1411 struct drm_i915_private *dev_priv; 1412 bool interruptible; 1413 int ret; 1414 1415 BUG_ON(req == NULL); 1416 1417 dev = req->ring->dev; 1418 dev_priv = dev->dev_private; 1419 interruptible = dev_priv->mm.interruptible; 1420 1421 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1422 1423 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); 1424 if (ret) 1425 return ret; 1426 1427 ret = i915_gem_check_olr(req); 1428 if (ret) 1429 return ret; 1430 1431 ret = __i915_wait_request(req, 1432 atomic_read(&dev_priv->gpu_error.reset_counter), 1433 interruptible, NULL, NULL); 1434 if (ret) 1435 return ret; 1436 1437 __i915_gem_request_retire__upto(req); 1438 return 0; 1439 } 1440 1441 /** 1442 * Ensures that all rendering to the object has completed and the object is 1443 * safe to unbind from the GTT or access from the CPU. 1444 */ 1445 int 1446 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, 1447 bool readonly) 1448 { 1449 int ret, i; 1450 1451 if (!obj->active) 1452 return 0; 1453 1454 if (readonly) { 1455 if (obj->last_write_req != NULL) { 1456 ret = i915_wait_request(obj->last_write_req); 1457 if (ret) 1458 return ret; 1459 1460 i = obj->last_write_req->ring->id; 1461 if (obj->last_read_req[i] == obj->last_write_req) 1462 i915_gem_object_retire__read(obj, i); 1463 else 1464 i915_gem_object_retire__write(obj); 1465 } 1466 } else { 1467 for (i = 0; i < I915_NUM_RINGS; i++) { 1468 if (obj->last_read_req[i] == NULL) 1469 continue; 1470 1471 ret = i915_wait_request(obj->last_read_req[i]); 1472 if (ret) 1473 return ret; 1474 1475 i915_gem_object_retire__read(obj, i); 1476 } 1477 RQ_BUG_ON(obj->active); 1478 } 1479 1480 return 0; 1481 } 1482 1483 static void 1484 i915_gem_object_retire_request(struct drm_i915_gem_object *obj, 1485 struct drm_i915_gem_request *req) 1486 { 1487 int ring = req->ring->id; 1488 1489 if (obj->last_read_req[ring] == req) 1490 i915_gem_object_retire__read(obj, ring); 1491 else if (obj->last_write_req == req) 1492 i915_gem_object_retire__write(obj); 1493 1494 __i915_gem_request_retire__upto(req); 1495 } 1496 1497 /* A nonblocking variant of the above wait. This is a highly dangerous routine 1498 * as the object state may change during this call. 1499 */ 1500 static __must_check int 1501 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, 1502 struct intel_rps_client *rps, 1503 bool readonly) 1504 { 1505 struct drm_device *dev = obj->base.dev; 1506 struct drm_i915_private *dev_priv = dev->dev_private; 1507 struct drm_i915_gem_request *requests[I915_NUM_RINGS]; 1508 unsigned reset_counter; 1509 int ret, i, n = 0; 1510 1511 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1512 BUG_ON(!dev_priv->mm.interruptible); 1513 1514 if (!obj->active) 1515 return 0; 1516 1517 ret = i915_gem_check_wedge(&dev_priv->gpu_error, true); 1518 if (ret) 1519 return ret; 1520 1521 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 1522 1523 if (readonly) { 1524 struct drm_i915_gem_request *req; 1525 1526 req = obj->last_write_req; 1527 if (req == NULL) 1528 return 0; 1529 1530 ret = i915_gem_check_olr(req); 1531 if (ret) 1532 goto err; 1533 1534 requests[n++] = i915_gem_request_reference(req); 1535 } else { 1536 for (i = 0; i < I915_NUM_RINGS; i++) { 1537 struct drm_i915_gem_request *req; 1538 1539 req = obj->last_read_req[i]; 1540 if (req == NULL) 1541 continue; 1542 1543 ret = i915_gem_check_olr(req); 1544 if (ret) 1545 goto err; 1546 1547 requests[n++] = i915_gem_request_reference(req); 1548 } 1549 } 1550 1551 mutex_unlock(&dev->struct_mutex); 1552 for (i = 0; ret == 0 && i < n; i++) 1553 ret = __i915_wait_request(requests[i], reset_counter, true, 1554 NULL, rps); 1555 mutex_lock(&dev->struct_mutex); 1556 1557 err: 1558 for (i = 0; i < n; i++) { 1559 if (ret == 0) 1560 i915_gem_object_retire_request(obj, requests[i]); 1561 i915_gem_request_unreference(requests[i]); 1562 } 1563 1564 return ret; 1565 } 1566 1567 static struct intel_rps_client *to_rps_client(struct drm_file *file) 1568 { 1569 struct drm_i915_file_private *fpriv = file->driver_priv; 1570 return &fpriv->rps; 1571 } 1572 1573 /** 1574 * Called when user space prepares to use an object with the CPU, either 1575 * through the mmap ioctl's mapping or a GTT mapping. 1576 */ 1577 int 1578 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1579 struct drm_file *file) 1580 { 1581 struct drm_i915_gem_set_domain *args = data; 1582 struct drm_i915_gem_object *obj; 1583 uint32_t read_domains = args->read_domains; 1584 uint32_t write_domain = args->write_domain; 1585 int ret; 1586 1587 /* Only handle setting domains to types used by the CPU. */ 1588 if (write_domain & I915_GEM_GPU_DOMAINS) 1589 return -EINVAL; 1590 1591 if (read_domains & I915_GEM_GPU_DOMAINS) 1592 return -EINVAL; 1593 1594 /* Having something in the write domain implies it's in the read 1595 * domain, and only that read domain. Enforce that in the request. 1596 */ 1597 if (write_domain != 0 && read_domains != write_domain) 1598 return -EINVAL; 1599 1600 ret = i915_mutex_lock_interruptible(dev); 1601 if (ret) 1602 return ret; 1603 1604 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1605 if (&obj->base == NULL) { 1606 ret = -ENOENT; 1607 goto unlock; 1608 } 1609 1610 /* Try to flush the object off the GPU without holding the lock. 1611 * We will repeat the flush holding the lock in the normal manner 1612 * to catch cases where we are gazumped. 1613 */ 1614 ret = i915_gem_object_wait_rendering__nonblocking(obj, 1615 to_rps_client(file), 1616 !write_domain); 1617 if (ret) 1618 goto unref; 1619 1620 if (read_domains & I915_GEM_DOMAIN_GTT) 1621 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 1622 else 1623 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 1624 1625 unref: 1626 drm_gem_object_unreference(&obj->base); 1627 unlock: 1628 mutex_unlock(&dev->struct_mutex); 1629 return ret; 1630 } 1631 1632 /** 1633 * Called when user space has done writes to this buffer 1634 */ 1635 int 1636 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1637 struct drm_file *file) 1638 { 1639 struct drm_i915_gem_sw_finish *args = data; 1640 struct drm_i915_gem_object *obj; 1641 int ret = 0; 1642 1643 ret = i915_mutex_lock_interruptible(dev); 1644 if (ret) 1645 return ret; 1646 1647 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1648 if (&obj->base == NULL) { 1649 ret = -ENOENT; 1650 goto unlock; 1651 } 1652 1653 /* Pinned buffers may be scanout, so flush the cache */ 1654 if (obj->pin_display) 1655 i915_gem_object_flush_cpu_write_domain(obj); 1656 1657 drm_gem_object_unreference(&obj->base); 1658 unlock: 1659 mutex_unlock(&dev->struct_mutex); 1660 return ret; 1661 } 1662 1663 /** 1664 * Maps the contents of an object, returning the address it is mapped 1665 * into. 1666 * 1667 * While the mapping holds a reference on the contents of the object, it doesn't 1668 * imply a ref on the object itself. 1669 * 1670 * IMPORTANT: 1671 * 1672 * DRM driver writers who look a this function as an example for how to do GEM 1673 * mmap support, please don't implement mmap support like here. The modern way 1674 * to implement DRM mmap support is with an mmap offset ioctl (like 1675 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly. 1676 * That way debug tooling like valgrind will understand what's going on, hiding 1677 * the mmap call in a driver private ioctl will break that. The i915 driver only 1678 * does cpu mmaps this way because we didn't know better. 1679 */ 1680 int 1681 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1682 struct drm_file *file) 1683 { 1684 struct drm_i915_gem_mmap *args = data; 1685 struct drm_gem_object *obj; 1686 unsigned long addr; 1687 struct proc *p = curproc; 1688 vm_map_t map = &p->p_vmspace->vm_map; 1689 vm_size_t size; 1690 int error = 0, rv; 1691 1692 obj = drm_gem_object_lookup(dev, file, args->handle); 1693 if (obj == NULL) 1694 return -ENOENT; 1695 1696 if (args->size == 0) 1697 goto out; 1698 1699 size = round_page(args->size); 1700 if (map->size + size > p->p_rlimit[RLIMIT_VMEM].rlim_cur) { 1701 error = -ENOMEM; 1702 goto out; 1703 } 1704 1705 /* 1706 * Call hint to ensure that NULL is not returned as a valid address 1707 * and to reduce vm_map traversals. XXX causes instability, use a 1708 * fixed low address as the start point instead to avoid the NULL 1709 * return issue. 1710 */ 1711 addr = PAGE_SIZE; 1712 1713 /* 1714 * Use 256KB alignment. It is unclear why this matters for a 1715 * virtual address but it appears to fix a number of application/X 1716 * crashes and kms console switching is much faster. 1717 */ 1718 vm_object_hold(obj->vm_obj); 1719 vm_object_reference_locked(obj->vm_obj); 1720 vm_object_drop(obj->vm_obj); 1721 1722 rv = vm_map_find(map, obj->vm_obj, NULL, 1723 args->offset, &addr, args->size, 1724 256 * 1024, /* align */ 1725 TRUE, /* fitit */ 1726 VM_MAPTYPE_NORMAL, /* maptype */ 1727 VM_PROT_READ | VM_PROT_WRITE, /* prot */ 1728 VM_PROT_READ | VM_PROT_WRITE, /* max */ 1729 MAP_SHARED /* cow */); 1730 if (rv != KERN_SUCCESS) { 1731 vm_object_deallocate(obj->vm_obj); 1732 error = -vm_mmap_to_errno(rv); 1733 } else { 1734 args->addr_ptr = (uint64_t)addr; 1735 } 1736 out: 1737 drm_gem_object_unreference(obj); 1738 return (error); 1739 } 1740 1741 /** 1742 * i915_gem_fault - fault a page into the GTT 1743 * 1744 * vm_obj is locked on entry and expected to be locked on return. 1745 * 1746 * The vm_pager has placemarked the object with an anonymous memory page 1747 * which we must replace atomically to avoid races against concurrent faults 1748 * on the same page. XXX we currently are unable to do this atomically. 1749 * 1750 * If we are to return an error we should not touch the anonymous page, 1751 * the caller will deallocate it. 1752 * 1753 * XXX Most GEM calls appear to be interruptable, but we can't hard loop 1754 * in that case. Release all resources and wait 1 tick before retrying. 1755 * This is a huge problem which needs to be fixed by getting rid of most 1756 * of the interruptability. The linux code does not retry but does appear 1757 * to have some sort of mechanism (VM_FAULT_NOPAGE ?) for the higher level 1758 * to be able to retry. 1759 * 1760 * -- 1761 * 1762 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1763 * from userspace. The fault handler takes care of binding the object to 1764 * the GTT (if needed), allocating and programming a fence register (again, 1765 * only if needed based on whether the old reg is still valid or the object 1766 * is tiled) and inserting a new PTE into the faulting process. 1767 * 1768 * Note that the faulting process may involve evicting existing objects 1769 * from the GTT and/or fence registers to make room. So performance may 1770 * suffer if the GTT working set is large or there are few fence registers 1771 * left. 1772 * 1773 * vm_obj is locked on entry and expected to be locked on return. The VM 1774 * pager has placed an anonymous memory page at (obj,offset) which we have 1775 * to replace. 1776 */ 1777 int i915_gem_fault(vm_object_t vm_obj, vm_ooffset_t offset, int prot, vm_page_t *mres) 1778 { 1779 struct drm_i915_gem_object *obj = to_intel_bo(vm_obj->handle); 1780 struct drm_device *dev = obj->base.dev; 1781 struct drm_i915_private *dev_priv = dev->dev_private; 1782 unsigned long page_offset; 1783 vm_page_t m, oldm = NULL; 1784 int ret = 0; 1785 int didpip = 0; 1786 bool write = !!(prot & VM_PROT_WRITE); 1787 1788 intel_runtime_pm_get(dev_priv); 1789 1790 /* We don't use vmf->pgoff since that has the fake offset */ 1791 page_offset = (unsigned long)offset; 1792 1793 retry: 1794 ret = i915_mutex_lock_interruptible(dev); 1795 if (ret) 1796 goto out; 1797 1798 trace_i915_gem_object_fault(obj, page_offset, true, write); 1799 1800 /* Try to flush the object off the GPU first without holding the lock. 1801 * Upon reacquiring the lock, we will perform our sanity checks and then 1802 * repeat the flush holding the lock in the normal manner to catch cases 1803 * where we are gazumped. 1804 */ 1805 ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write); 1806 if (ret) 1807 goto unlock; 1808 1809 /* Access to snoopable pages through the GTT is incoherent. */ 1810 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) { 1811 ret = -EFAULT; 1812 goto unlock; 1813 } 1814 1815 /* Now bind it into the GTT if needed */ 1816 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE); 1817 if (ret) 1818 goto unlock; 1819 1820 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1821 if (ret) 1822 goto unpin; 1823 1824 ret = i915_gem_object_get_fence(obj); 1825 if (ret) 1826 goto unpin; 1827 1828 /* 1829 * START FREEBSD MAGIC 1830 * 1831 * Add a pip count to avoid destruction and certain other 1832 * complex operations (such as collapses?) while unlocked. 1833 */ 1834 if (didpip == 0) { 1835 vm_object_pip_add(vm_obj, 1); 1836 didpip = 1; 1837 } 1838 1839 /* 1840 * XXX We must currently remove the placeholder page now to avoid 1841 * a deadlock against a concurrent i915_gem_release_mmap(). 1842 * Otherwise concurrent operation will block on the busy page 1843 * while holding locks which we need to obtain. 1844 */ 1845 if (*mres != NULL) { 1846 oldm = *mres; 1847 if ((oldm->flags & PG_BUSY) == 0) 1848 kprintf("i915_gem_fault: Page was not busy\n"); 1849 else 1850 vm_page_remove(oldm); 1851 *mres = NULL; 1852 } else { 1853 oldm = NULL; 1854 } 1855 1856 ret = 0; 1857 m = NULL; 1858 1859 /* 1860 * Since the object lock was dropped, another thread might have 1861 * faulted on the same GTT address and instantiated the mapping. 1862 * Recheck. 1863 */ 1864 m = vm_page_lookup(vm_obj, OFF_TO_IDX(offset)); 1865 if (m != NULL) { 1866 /* 1867 * Try to busy the page, retry on failure (non-zero ret). 1868 */ 1869 if (vm_page_busy_try(m, false)) { 1870 kprintf("i915_gem_fault: PG_BUSY\n"); 1871 ret = -EINTR; 1872 goto unlock; 1873 } 1874 goto have_page; 1875 } 1876 /* 1877 * END FREEBSD MAGIC 1878 */ 1879 1880 obj->fault_mappable = true; 1881 1882 m = vm_phys_fictitious_to_vm_page(dev_priv->gtt.mappable_base + 1883 i915_gem_obj_ggtt_offset(obj) + 1884 offset); 1885 if (m == NULL) { 1886 ret = -EFAULT; 1887 goto unpin; 1888 } 1889 KASSERT((m->flags & PG_FICTITIOUS) != 0, ("not fictitious %p", m)); 1890 KASSERT(m->wire_count == 1, ("wire_count not 1 %p", m)); 1891 1892 /* 1893 * Try to busy the page. Fails on non-zero return. 1894 */ 1895 if (vm_page_busy_try(m, false)) { 1896 kprintf("i915_gem_fault: PG_BUSY(2)\n"); 1897 ret = -EINTR; 1898 goto unpin; 1899 } 1900 m->valid = VM_PAGE_BITS_ALL; 1901 1902 /* 1903 * Finally, remap it using the new GTT offset. 1904 * 1905 * (object expected to be in a locked state) 1906 */ 1907 vm_page_insert(m, vm_obj, OFF_TO_IDX(offset)); 1908 have_page: 1909 *mres = m; 1910 1911 i915_gem_object_ggtt_unpin(obj); 1912 mutex_unlock(&dev->struct_mutex); 1913 ret = VM_PAGER_OK; 1914 goto done; 1915 1916 /* 1917 * ALTERNATIVE ERROR RETURN. 1918 * 1919 * OBJECT EXPECTED TO BE LOCKED. 1920 */ 1921 unpin: 1922 i915_gem_object_ggtt_unpin(obj); 1923 unlock: 1924 mutex_unlock(&dev->struct_mutex); 1925 out: 1926 switch (ret) { 1927 case -EIO: 1928 /* 1929 * We eat errors when the gpu is terminally wedged to avoid 1930 * userspace unduly crashing (gl has no provisions for mmaps to 1931 * fail). But any other -EIO isn't ours (e.g. swap in failure) 1932 * and so needs to be reported. 1933 */ 1934 if (!i915_terminally_wedged(&dev_priv->gpu_error)) { 1935 // ret = VM_FAULT_SIGBUS; 1936 break; 1937 } 1938 /* fall through */ 1939 case -EAGAIN: 1940 /* 1941 * EAGAIN means the gpu is hung and we'll wait for the error 1942 * handler to reset everything when re-faulting in 1943 * i915_mutex_lock_interruptible. 1944 */ 1945 /* fall through */ 1946 case -ERESTARTSYS: 1947 case -EINTR: 1948 VM_OBJECT_UNLOCK(vm_obj); 1949 int dummy; 1950 tsleep(&dummy, 0, "delay", 1); /* XXX */ 1951 VM_OBJECT_LOCK(vm_obj); 1952 goto retry; 1953 default: 1954 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 1955 ret = VM_PAGER_ERROR; 1956 break; 1957 } 1958 1959 done: 1960 if (oldm != NULL) 1961 vm_page_free(oldm); 1962 if (didpip) 1963 vm_object_pip_wakeup(vm_obj); 1964 1965 intel_runtime_pm_put(dev_priv); 1966 return ret; 1967 } 1968 1969 /** 1970 * i915_gem_release_mmap - remove physical page mappings 1971 * @obj: obj in question 1972 * 1973 * Preserve the reservation of the mmapping with the DRM core code, but 1974 * relinquish ownership of the pages back to the system. 1975 * 1976 * It is vital that we remove the page mapping if we have mapped a tiled 1977 * object through the GTT and then lose the fence register due to 1978 * resource pressure. Similarly if the object has been moved out of the 1979 * aperture, than pages mapped into userspace must be revoked. Removing the 1980 * mapping will then trigger a page fault on the next user access, allowing 1981 * fixup by i915_gem_fault(). 1982 */ 1983 void 1984 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 1985 { 1986 vm_object_t devobj; 1987 vm_page_t m; 1988 int i, page_count; 1989 1990 if (!obj->fault_mappable) 1991 return; 1992 1993 devobj = cdev_pager_lookup(obj); 1994 if (devobj != NULL) { 1995 page_count = OFF_TO_IDX(obj->base.size); 1996 1997 VM_OBJECT_LOCK(devobj); 1998 for (i = 0; i < page_count; i++) { 1999 m = vm_page_lookup_busy_wait(devobj, i, TRUE, "915unm"); 2000 if (m == NULL) 2001 continue; 2002 cdev_pager_free_page(devobj, m); 2003 } 2004 VM_OBJECT_UNLOCK(devobj); 2005 vm_object_deallocate(devobj); 2006 } 2007 2008 obj->fault_mappable = false; 2009 } 2010 2011 void 2012 i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv) 2013 { 2014 struct drm_i915_gem_object *obj; 2015 2016 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) 2017 i915_gem_release_mmap(obj); 2018 } 2019 2020 uint32_t 2021 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) 2022 { 2023 uint32_t gtt_size; 2024 2025 if (INTEL_INFO(dev)->gen >= 4 || 2026 tiling_mode == I915_TILING_NONE) 2027 return size; 2028 2029 /* Previous chips need a power-of-two fence region when tiling */ 2030 if (INTEL_INFO(dev)->gen == 3) 2031 gtt_size = 1024*1024; 2032 else 2033 gtt_size = 512*1024; 2034 2035 while (gtt_size < size) 2036 gtt_size <<= 1; 2037 2038 return gtt_size; 2039 } 2040 2041 /** 2042 * i915_gem_get_gtt_alignment - return required GTT alignment for an object 2043 * @obj: object to check 2044 * 2045 * Return the required GTT alignment for an object, taking into account 2046 * potential fence register mapping. 2047 */ 2048 uint32_t 2049 i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size, 2050 int tiling_mode, bool fenced) 2051 { 2052 /* 2053 * Minimum alignment is 4k (GTT page size), but might be greater 2054 * if a fence register is needed for the object. 2055 */ 2056 if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) || 2057 tiling_mode == I915_TILING_NONE) 2058 return 4096; 2059 2060 /* 2061 * Previous chips need to be aligned to the size of the smallest 2062 * fence register that can contain the object. 2063 */ 2064 return i915_gem_get_gtt_size(dev, size, tiling_mode); 2065 } 2066 2067 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) 2068 { 2069 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2070 int ret; 2071 2072 #if 0 2073 if (drm_vma_node_has_offset(&obj->base.vma_node)) 2074 return 0; 2075 #endif 2076 2077 dev_priv->mm.shrinker_no_lock_stealing = true; 2078 2079 ret = drm_gem_create_mmap_offset(&obj->base); 2080 if (ret != -ENOSPC) 2081 goto out; 2082 2083 /* Badly fragmented mmap space? The only way we can recover 2084 * space is by destroying unwanted objects. We can't randomly release 2085 * mmap_offsets as userspace expects them to be persistent for the 2086 * lifetime of the objects. The closest we can is to release the 2087 * offsets on purgeable objects by truncating it and marking it purged, 2088 * which prevents userspace from ever using that object again. 2089 */ 2090 i915_gem_shrink(dev_priv, 2091 obj->base.size >> PAGE_SHIFT, 2092 I915_SHRINK_BOUND | 2093 I915_SHRINK_UNBOUND | 2094 I915_SHRINK_PURGEABLE); 2095 ret = drm_gem_create_mmap_offset(&obj->base); 2096 if (ret != -ENOSPC) 2097 goto out; 2098 2099 i915_gem_shrink_all(dev_priv); 2100 ret = drm_gem_create_mmap_offset(&obj->base); 2101 out: 2102 dev_priv->mm.shrinker_no_lock_stealing = false; 2103 2104 return ret; 2105 } 2106 2107 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) 2108 { 2109 drm_gem_free_mmap_offset(&obj->base); 2110 } 2111 2112 int 2113 i915_gem_mmap_gtt(struct drm_file *file, 2114 struct drm_device *dev, 2115 uint32_t handle, 2116 uint64_t *offset) 2117 { 2118 struct drm_i915_gem_object *obj; 2119 int ret; 2120 2121 ret = i915_mutex_lock_interruptible(dev); 2122 if (ret) 2123 return ret; 2124 2125 obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle)); 2126 if (&obj->base == NULL) { 2127 ret = -ENOENT; 2128 goto unlock; 2129 } 2130 2131 if (obj->madv != I915_MADV_WILLNEED) { 2132 DRM_DEBUG("Attempting to mmap a purgeable buffer\n"); 2133 ret = -EFAULT; 2134 goto out; 2135 } 2136 2137 ret = i915_gem_object_create_mmap_offset(obj); 2138 if (ret) 2139 goto out; 2140 2141 *offset = DRM_GEM_MAPPING_OFF(obj->base.map_list.key) | 2142 DRM_GEM_MAPPING_KEY; 2143 2144 out: 2145 drm_gem_object_unreference(&obj->base); 2146 unlock: 2147 mutex_unlock(&dev->struct_mutex); 2148 return ret; 2149 } 2150 2151 /** 2152 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 2153 * @dev: DRM device 2154 * @data: GTT mapping ioctl data 2155 * @file: GEM object info 2156 * 2157 * Simply returns the fake offset to userspace so it can mmap it. 2158 * The mmap call will end up in drm_gem_mmap(), which will set things 2159 * up so we can get faults in the handler above. 2160 * 2161 * The fault handler will take care of binding the object into the GTT 2162 * (since it may have been evicted to make room for something), allocating 2163 * a fence register, and mapping the appropriate aperture address into 2164 * userspace. 2165 */ 2166 int 2167 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 2168 struct drm_file *file) 2169 { 2170 struct drm_i915_gem_mmap_gtt *args = data; 2171 2172 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); 2173 } 2174 2175 /* Immediately discard the backing storage */ 2176 static void 2177 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 2178 { 2179 vm_object_t vm_obj; 2180 2181 vm_obj = obj->base.vm_obj; 2182 VM_OBJECT_LOCK(vm_obj); 2183 vm_object_page_remove(vm_obj, 0, 0, false); 2184 VM_OBJECT_UNLOCK(vm_obj); 2185 2186 obj->madv = __I915_MADV_PURGED; 2187 } 2188 2189 /* Try to discard unwanted pages */ 2190 static void 2191 i915_gem_object_invalidate(struct drm_i915_gem_object *obj) 2192 { 2193 #if 0 2194 struct address_space *mapping; 2195 #endif 2196 2197 switch (obj->madv) { 2198 case I915_MADV_DONTNEED: 2199 i915_gem_object_truncate(obj); 2200 case __I915_MADV_PURGED: 2201 return; 2202 } 2203 2204 #if 0 2205 if (obj->base.filp == NULL) 2206 return; 2207 2208 mapping = file_inode(obj->base.filp)->i_mapping, 2209 invalidate_mapping_pages(mapping, 0, (loff_t)-1); 2210 #endif 2211 } 2212 2213 static void 2214 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) 2215 { 2216 int page_count = obj->base.size / PAGE_SIZE; 2217 int i, ret; 2218 2219 if (!obj->pages) 2220 return; 2221 2222 BUG_ON(obj->madv == __I915_MADV_PURGED); 2223 2224 ret = i915_gem_object_set_to_cpu_domain(obj, true); 2225 if (ret) { 2226 /* In the event of a disaster, abandon all caches and 2227 * hope for the best. 2228 */ 2229 WARN_ON(ret != -EIO); 2230 i915_gem_clflush_object(obj, true); 2231 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 2232 } 2233 2234 i915_gem_gtt_finish_object(obj); 2235 2236 if (i915_gem_object_needs_bit17_swizzle(obj)) 2237 i915_gem_object_save_bit_17_swizzle(obj); 2238 2239 if (obj->madv == I915_MADV_DONTNEED) 2240 obj->dirty = 0; 2241 2242 for (i = 0; i < page_count; i++) { 2243 struct vm_page *page = obj->pages[i]; 2244 2245 if (obj->dirty) 2246 set_page_dirty(page); 2247 2248 if (obj->madv == I915_MADV_WILLNEED) 2249 mark_page_accessed(page); 2250 2251 vm_page_busy_wait(obj->pages[i], FALSE, "i915gem"); 2252 vm_page_unwire(obj->pages[i], 1); 2253 vm_page_wakeup(obj->pages[i]); 2254 } 2255 obj->dirty = 0; 2256 2257 kfree(obj->pages); 2258 obj->pages = NULL; 2259 } 2260 2261 int 2262 i915_gem_object_put_pages(struct drm_i915_gem_object *obj) 2263 { 2264 const struct drm_i915_gem_object_ops *ops = obj->ops; 2265 2266 if (obj->pages == NULL) 2267 return 0; 2268 2269 if (obj->pages_pin_count) 2270 return -EBUSY; 2271 2272 BUG_ON(i915_gem_obj_bound_any(obj)); 2273 2274 /* ->put_pages might need to allocate memory for the bit17 swizzle 2275 * array, hence protect them from being reaped by removing them from gtt 2276 * lists early. */ 2277 list_del(&obj->global_list); 2278 2279 ops->put_pages(obj); 2280 obj->pages = NULL; 2281 2282 i915_gem_object_invalidate(obj); 2283 2284 return 0; 2285 } 2286 2287 static int 2288 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 2289 { 2290 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2291 int page_count, i, j; 2292 vm_object_t vm_obj; 2293 struct vm_page *page; 2294 int ret = -EIO; 2295 2296 /* Assert that the object is not currently in any GPU domain. As it 2297 * wasn't in the GTT, there shouldn't be any way it could have been in 2298 * a GPU cache 2299 */ 2300 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); 2301 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); 2302 2303 page_count = obj->base.size / PAGE_SIZE; 2304 obj->pages = kmalloc(page_count * sizeof(vm_page_t), M_DRM, 2305 M_WAITOK); 2306 2307 /* Get the list of pages out of our struct file. They'll be pinned 2308 * at this point until we release them. 2309 * 2310 * Fail silently without starting the shrinker 2311 */ 2312 vm_obj = obj->base.vm_obj; 2313 VM_OBJECT_LOCK(vm_obj); 2314 for (i = 0; i < page_count; i++) { 2315 page = shmem_read_mapping_page(vm_obj, i); 2316 if (IS_ERR(page)) { 2317 i915_gem_shrink(dev_priv, 2318 page_count, 2319 I915_SHRINK_BOUND | 2320 I915_SHRINK_UNBOUND | 2321 I915_SHRINK_PURGEABLE); 2322 page = shmem_read_mapping_page(vm_obj, i); 2323 } 2324 if (IS_ERR(page)) { 2325 /* We've tried hard to allocate the memory by reaping 2326 * our own buffer, now let the real VM do its job and 2327 * go down in flames if truly OOM. 2328 */ 2329 2330 i915_gem_shrink_all(dev_priv); 2331 page = shmem_read_mapping_page(vm_obj, i); 2332 if (IS_ERR(page)) { 2333 ret = PTR_ERR(page); 2334 goto err_pages; 2335 } 2336 } 2337 #ifdef CONFIG_SWIOTLB 2338 if (swiotlb_nr_tbl()) { 2339 st->nents++; 2340 sg_set_page(sg, page, PAGE_SIZE, 0); 2341 sg = sg_next(sg); 2342 continue; 2343 } 2344 #endif 2345 obj->pages[i] = page; 2346 } 2347 #ifdef CONFIG_SWIOTLB 2348 if (!swiotlb_nr_tbl()) 2349 #endif 2350 VM_OBJECT_UNLOCK(vm_obj); 2351 2352 ret = i915_gem_gtt_prepare_object(obj); 2353 if (ret) 2354 goto err_pages; 2355 2356 if (i915_gem_object_needs_bit17_swizzle(obj)) 2357 i915_gem_object_do_bit_17_swizzle(obj); 2358 2359 if (obj->tiling_mode != I915_TILING_NONE && 2360 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) 2361 i915_gem_object_pin_pages(obj); 2362 2363 return 0; 2364 2365 err_pages: 2366 for (j = 0; j < i; j++) { 2367 page = obj->pages[j]; 2368 vm_page_busy_wait(page, FALSE, "i915gem"); 2369 vm_page_unwire(page, 0); 2370 vm_page_wakeup(page); 2371 } 2372 VM_OBJECT_UNLOCK(vm_obj); 2373 kfree(obj->pages); 2374 obj->pages = NULL; 2375 2376 /* shmemfs first checks if there is enough memory to allocate the page 2377 * and reports ENOSPC should there be insufficient, along with the usual 2378 * ENOMEM for a genuine allocation failure. 2379 * 2380 * We use ENOSPC in our driver to mean that we have run out of aperture 2381 * space and so want to translate the error from shmemfs back to our 2382 * usual understanding of ENOMEM. 2383 */ 2384 if (ret == -ENOSPC) 2385 ret = -ENOMEM; 2386 2387 return ret; 2388 } 2389 2390 /* Ensure that the associated pages are gathered from the backing storage 2391 * and pinned into our object. i915_gem_object_get_pages() may be called 2392 * multiple times before they are released by a single call to 2393 * i915_gem_object_put_pages() - once the pages are no longer referenced 2394 * either as a result of memory pressure (reaping pages under the shrinker) 2395 * or as the object is itself released. 2396 */ 2397 int 2398 i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2399 { 2400 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2401 const struct drm_i915_gem_object_ops *ops = obj->ops; 2402 int ret; 2403 2404 if (obj->pages) 2405 return 0; 2406 2407 if (obj->madv != I915_MADV_WILLNEED) { 2408 DRM_DEBUG("Attempting to obtain a purgeable object\n"); 2409 return -EFAULT; 2410 } 2411 2412 BUG_ON(obj->pages_pin_count); 2413 2414 ret = ops->get_pages(obj); 2415 if (ret) 2416 return ret; 2417 2418 list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list); 2419 return 0; 2420 } 2421 2422 void i915_vma_move_to_active(struct i915_vma *vma, 2423 struct intel_engine_cs *ring) 2424 { 2425 struct drm_i915_gem_object *obj = vma->obj; 2426 2427 /* Add a reference if we're newly entering the active list. */ 2428 if (obj->active == 0) 2429 drm_gem_object_reference(&obj->base); 2430 obj->active |= intel_ring_flag(ring); 2431 2432 list_move_tail(&obj->ring_list[ring->id], &ring->active_list); 2433 i915_gem_request_assign(&obj->last_read_req[ring->id], 2434 intel_ring_get_request(ring)); 2435 2436 list_move_tail(&vma->mm_list, &vma->vm->active_list); 2437 } 2438 2439 static void 2440 i915_gem_object_retire__write(struct drm_i915_gem_object *obj) 2441 { 2442 RQ_BUG_ON(obj->last_write_req == NULL); 2443 RQ_BUG_ON(!(obj->active & intel_ring_flag(obj->last_write_req->ring))); 2444 2445 i915_gem_request_assign(&obj->last_write_req, NULL); 2446 intel_fb_obj_flush(obj, true); 2447 } 2448 2449 static void 2450 i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring) 2451 { 2452 struct i915_vma *vma; 2453 2454 RQ_BUG_ON(obj->last_read_req[ring] == NULL); 2455 RQ_BUG_ON(!(obj->active & (1 << ring))); 2456 2457 list_del_init(&obj->ring_list[ring]); 2458 i915_gem_request_assign(&obj->last_read_req[ring], NULL); 2459 2460 if (obj->last_write_req && obj->last_write_req->ring->id == ring) 2461 i915_gem_object_retire__write(obj); 2462 2463 obj->active &= ~(1 << ring); 2464 if (obj->active) 2465 return; 2466 2467 list_for_each_entry(vma, &obj->vma_list, vma_link) { 2468 if (!list_empty(&vma->mm_list)) 2469 list_move_tail(&vma->mm_list, &vma->vm->inactive_list); 2470 } 2471 2472 i915_gem_request_assign(&obj->last_fenced_req, NULL); 2473 drm_gem_object_unreference(&obj->base); 2474 } 2475 2476 static int 2477 i915_gem_init_seqno(struct drm_device *dev, u32 seqno) 2478 { 2479 struct drm_i915_private *dev_priv = dev->dev_private; 2480 struct intel_engine_cs *ring; 2481 int ret, i, j; 2482 2483 /* Carefully retire all requests without writing to the rings */ 2484 for_each_ring(ring, dev_priv, i) { 2485 ret = intel_ring_idle(ring); 2486 if (ret) 2487 return ret; 2488 } 2489 i915_gem_retire_requests(dev); 2490 2491 /* Finally reset hw state */ 2492 for_each_ring(ring, dev_priv, i) { 2493 intel_ring_init_seqno(ring, seqno); 2494 2495 for (j = 0; j < ARRAY_SIZE(ring->semaphore.sync_seqno); j++) 2496 ring->semaphore.sync_seqno[j] = 0; 2497 } 2498 2499 return 0; 2500 } 2501 2502 int i915_gem_set_seqno(struct drm_device *dev, u32 seqno) 2503 { 2504 struct drm_i915_private *dev_priv = dev->dev_private; 2505 int ret; 2506 2507 if (seqno == 0) 2508 return -EINVAL; 2509 2510 /* HWS page needs to be set less than what we 2511 * will inject to ring 2512 */ 2513 ret = i915_gem_init_seqno(dev, seqno - 1); 2514 if (ret) 2515 return ret; 2516 2517 /* Carefully set the last_seqno value so that wrap 2518 * detection still works 2519 */ 2520 dev_priv->next_seqno = seqno; 2521 dev_priv->last_seqno = seqno - 1; 2522 if (dev_priv->last_seqno == 0) 2523 dev_priv->last_seqno--; 2524 2525 return 0; 2526 } 2527 2528 int 2529 i915_gem_get_seqno(struct drm_device *dev, u32 *seqno) 2530 { 2531 struct drm_i915_private *dev_priv = dev->dev_private; 2532 2533 /* reserve 0 for non-seqno */ 2534 if (dev_priv->next_seqno == 0) { 2535 int ret = i915_gem_init_seqno(dev, 0); 2536 if (ret) 2537 return ret; 2538 2539 dev_priv->next_seqno = 1; 2540 } 2541 2542 *seqno = dev_priv->last_seqno = dev_priv->next_seqno++; 2543 return 0; 2544 } 2545 2546 int __i915_add_request(struct intel_engine_cs *ring, 2547 struct drm_file *file, 2548 struct drm_i915_gem_object *obj) 2549 { 2550 struct drm_i915_private *dev_priv = ring->dev->dev_private; 2551 struct drm_i915_gem_request *request; 2552 struct intel_ringbuffer *ringbuf; 2553 u32 request_start; 2554 int ret; 2555 2556 request = ring->outstanding_lazy_request; 2557 if (WARN_ON(request == NULL)) 2558 return -ENOMEM; 2559 2560 if (i915.enable_execlists) { 2561 ringbuf = request->ctx->engine[ring->id].ringbuf; 2562 } else 2563 ringbuf = ring->buffer; 2564 2565 request_start = intel_ring_get_tail(ringbuf); 2566 /* 2567 * Emit any outstanding flushes - execbuf can fail to emit the flush 2568 * after having emitted the batchbuffer command. Hence we need to fix 2569 * things up similar to emitting the lazy request. The difference here 2570 * is that the flush _must_ happen before the next request, no matter 2571 * what. 2572 */ 2573 if (i915.enable_execlists) { 2574 ret = logical_ring_flush_all_caches(ringbuf, request->ctx); 2575 if (ret) 2576 return ret; 2577 } else { 2578 ret = intel_ring_flush_all_caches(ring); 2579 if (ret) 2580 return ret; 2581 } 2582 2583 /* Record the position of the start of the request so that 2584 * should we detect the updated seqno part-way through the 2585 * GPU processing the request, we never over-estimate the 2586 * position of the head. 2587 */ 2588 request->postfix = intel_ring_get_tail(ringbuf); 2589 2590 if (i915.enable_execlists) { 2591 ret = ring->emit_request(ringbuf, request); 2592 if (ret) 2593 return ret; 2594 } else { 2595 ret = ring->add_request(ring); 2596 if (ret) 2597 return ret; 2598 2599 request->tail = intel_ring_get_tail(ringbuf); 2600 } 2601 2602 request->head = request_start; 2603 2604 /* Whilst this request exists, batch_obj will be on the 2605 * active_list, and so will hold the active reference. Only when this 2606 * request is retired will the the batch_obj be moved onto the 2607 * inactive_list and lose its active reference. Hence we do not need 2608 * to explicitly hold another reference here. 2609 */ 2610 request->batch_obj = obj; 2611 2612 if (!i915.enable_execlists) { 2613 /* Hold a reference to the current context so that we can inspect 2614 * it later in case a hangcheck error event fires. 2615 */ 2616 request->ctx = ring->last_context; 2617 if (request->ctx) 2618 i915_gem_context_reference(request->ctx); 2619 } 2620 2621 request->emitted_jiffies = jiffies; 2622 ring->last_submitted_seqno = request->seqno; 2623 list_add_tail(&request->list, &ring->request_list); 2624 request->file_priv = NULL; 2625 2626 if (file) { 2627 struct drm_i915_file_private *file_priv = file->driver_priv; 2628 2629 spin_lock(&file_priv->mm.lock); 2630 request->file_priv = file_priv; 2631 list_add_tail(&request->client_list, 2632 &file_priv->mm.request_list); 2633 spin_unlock(&file_priv->mm.lock); 2634 2635 request->pid = curproc->p_pid; 2636 } 2637 2638 trace_i915_gem_request_add(request); 2639 ring->outstanding_lazy_request = NULL; 2640 2641 i915_queue_hangcheck(ring->dev); 2642 2643 queue_delayed_work(dev_priv->wq, 2644 &dev_priv->mm.retire_work, 2645 round_jiffies_up_relative(HZ)); 2646 intel_mark_busy(dev_priv->dev); 2647 2648 return 0; 2649 } 2650 2651 static bool i915_context_is_banned(struct drm_i915_private *dev_priv, 2652 const struct intel_context *ctx) 2653 { 2654 unsigned long elapsed; 2655 2656 elapsed = get_seconds() - ctx->hang_stats.guilty_ts; 2657 2658 if (ctx->hang_stats.banned) 2659 return true; 2660 2661 if (ctx->hang_stats.ban_period_seconds && 2662 elapsed <= ctx->hang_stats.ban_period_seconds) { 2663 if (!i915_gem_context_is_default(ctx)) { 2664 DRM_DEBUG("context hanging too fast, banning!\n"); 2665 return true; 2666 } else if (i915_stop_ring_allow_ban(dev_priv)) { 2667 if (i915_stop_ring_allow_warn(dev_priv)) 2668 DRM_ERROR("gpu hanging too fast, banning!\n"); 2669 return true; 2670 } 2671 } 2672 2673 return false; 2674 } 2675 2676 static void i915_set_reset_status(struct drm_i915_private *dev_priv, 2677 struct intel_context *ctx, 2678 const bool guilty) 2679 { 2680 struct i915_ctx_hang_stats *hs; 2681 2682 if (WARN_ON(!ctx)) 2683 return; 2684 2685 hs = &ctx->hang_stats; 2686 2687 if (guilty) { 2688 hs->banned = i915_context_is_banned(dev_priv, ctx); 2689 hs->batch_active++; 2690 hs->guilty_ts = get_seconds(); 2691 } else { 2692 hs->batch_pending++; 2693 } 2694 } 2695 2696 void i915_gem_request_free(struct kref *req_ref) 2697 { 2698 struct drm_i915_gem_request *req = container_of(req_ref, 2699 typeof(*req), ref); 2700 struct intel_context *ctx = req->ctx; 2701 2702 if (ctx) { 2703 if (i915.enable_execlists) { 2704 struct intel_engine_cs *ring = req->ring; 2705 2706 if (ctx != ring->default_context) 2707 intel_lr_context_unpin(ring, ctx); 2708 } 2709 2710 i915_gem_context_unreference(ctx); 2711 } 2712 2713 kfree(req); 2714 } 2715 2716 int i915_gem_request_alloc(struct intel_engine_cs *ring, 2717 struct intel_context *ctx) 2718 { 2719 struct drm_i915_private *dev_priv = to_i915(ring->dev); 2720 struct drm_i915_gem_request *req; 2721 int ret; 2722 2723 if (ring->outstanding_lazy_request) 2724 return 0; 2725 2726 req = kzalloc(sizeof(*req), GFP_KERNEL); 2727 if (req == NULL) 2728 return -ENOMEM; 2729 2730 kref_init(&req->ref); 2731 req->i915 = dev_priv; 2732 2733 ret = i915_gem_get_seqno(ring->dev, &req->seqno); 2734 if (ret) 2735 goto err; 2736 2737 req->ring = ring; 2738 2739 if (i915.enable_execlists) 2740 ret = intel_logical_ring_alloc_request_extras(req, ctx); 2741 else 2742 ret = intel_ring_alloc_request_extras(req); 2743 if (ret) 2744 goto err; 2745 2746 ring->outstanding_lazy_request = req; 2747 return 0; 2748 2749 err: 2750 kfree(req); 2751 return ret; 2752 } 2753 2754 struct drm_i915_gem_request * 2755 i915_gem_find_active_request(struct intel_engine_cs *ring) 2756 { 2757 struct drm_i915_gem_request *request; 2758 2759 list_for_each_entry(request, &ring->request_list, list) { 2760 if (i915_gem_request_completed(request, false)) 2761 continue; 2762 2763 return request; 2764 } 2765 2766 return NULL; 2767 } 2768 2769 static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv, 2770 struct intel_engine_cs *ring) 2771 { 2772 struct drm_i915_gem_request *request; 2773 bool ring_hung; 2774 2775 request = i915_gem_find_active_request(ring); 2776 2777 if (request == NULL) 2778 return; 2779 2780 ring_hung = ring->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG; 2781 2782 i915_set_reset_status(dev_priv, request->ctx, ring_hung); 2783 2784 list_for_each_entry_continue(request, &ring->request_list, list) 2785 i915_set_reset_status(dev_priv, request->ctx, false); 2786 } 2787 2788 static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv, 2789 struct intel_engine_cs *ring) 2790 { 2791 while (!list_empty(&ring->active_list)) { 2792 struct drm_i915_gem_object *obj; 2793 2794 obj = list_first_entry(&ring->active_list, 2795 struct drm_i915_gem_object, 2796 ring_list[ring->id]); 2797 2798 i915_gem_object_retire__read(obj, ring->id); 2799 } 2800 2801 /* 2802 * Clear the execlists queue up before freeing the requests, as those 2803 * are the ones that keep the context and ringbuffer backing objects 2804 * pinned in place. 2805 */ 2806 while (!list_empty(&ring->execlist_queue)) { 2807 struct drm_i915_gem_request *submit_req; 2808 2809 submit_req = list_first_entry(&ring->execlist_queue, 2810 struct drm_i915_gem_request, 2811 execlist_link); 2812 list_del(&submit_req->execlist_link); 2813 2814 if (submit_req->ctx != ring->default_context) 2815 intel_lr_context_unpin(ring, submit_req->ctx); 2816 2817 i915_gem_request_unreference(submit_req); 2818 } 2819 2820 /* 2821 * We must free the requests after all the corresponding objects have 2822 * been moved off active lists. Which is the same order as the normal 2823 * retire_requests function does. This is important if object hold 2824 * implicit references on things like e.g. ppgtt address spaces through 2825 * the request. 2826 */ 2827 while (!list_empty(&ring->request_list)) { 2828 struct drm_i915_gem_request *request; 2829 2830 request = list_first_entry(&ring->request_list, 2831 struct drm_i915_gem_request, 2832 list); 2833 2834 i915_gem_request_retire(request); 2835 } 2836 2837 /* This may not have been flushed before the reset, so clean it now */ 2838 i915_gem_request_assign(&ring->outstanding_lazy_request, NULL); 2839 } 2840 2841 void i915_gem_restore_fences(struct drm_device *dev) 2842 { 2843 struct drm_i915_private *dev_priv = dev->dev_private; 2844 int i; 2845 2846 for (i = 0; i < dev_priv->num_fence_regs; i++) { 2847 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; 2848 2849 /* 2850 * Commit delayed tiling changes if we have an object still 2851 * attached to the fence, otherwise just clear the fence. 2852 */ 2853 if (reg->obj) { 2854 i915_gem_object_update_fence(reg->obj, reg, 2855 reg->obj->tiling_mode); 2856 } else { 2857 i915_gem_write_fence(dev, i, NULL); 2858 } 2859 } 2860 } 2861 2862 void i915_gem_reset(struct drm_device *dev) 2863 { 2864 struct drm_i915_private *dev_priv = dev->dev_private; 2865 struct intel_engine_cs *ring; 2866 int i; 2867 2868 /* 2869 * Before we free the objects from the requests, we need to inspect 2870 * them for finding the guilty party. As the requests only borrow 2871 * their reference to the objects, the inspection must be done first. 2872 */ 2873 for_each_ring(ring, dev_priv, i) 2874 i915_gem_reset_ring_status(dev_priv, ring); 2875 2876 for_each_ring(ring, dev_priv, i) 2877 i915_gem_reset_ring_cleanup(dev_priv, ring); 2878 2879 i915_gem_context_reset(dev); 2880 2881 i915_gem_restore_fences(dev); 2882 2883 WARN_ON(i915_verify_lists(dev)); 2884 } 2885 2886 /** 2887 * This function clears the request list as sequence numbers are passed. 2888 */ 2889 void 2890 i915_gem_retire_requests_ring(struct intel_engine_cs *ring) 2891 { 2892 WARN_ON(i915_verify_lists(ring->dev)); 2893 2894 /* Retire requests first as we use it above for the early return. 2895 * If we retire requests last, we may use a later seqno and so clear 2896 * the requests lists without clearing the active list, leading to 2897 * confusion. 2898 */ 2899 while (!list_empty(&ring->request_list)) { 2900 struct drm_i915_gem_request *request; 2901 2902 request = list_first_entry(&ring->request_list, 2903 struct drm_i915_gem_request, 2904 list); 2905 2906 if (!i915_gem_request_completed(request, true)) 2907 break; 2908 2909 i915_gem_request_retire(request); 2910 } 2911 2912 /* Move any buffers on the active list that are no longer referenced 2913 * by the ringbuffer to the flushing/inactive lists as appropriate, 2914 * before we free the context associated with the requests. 2915 */ 2916 while (!list_empty(&ring->active_list)) { 2917 struct drm_i915_gem_object *obj; 2918 2919 obj = list_first_entry(&ring->active_list, 2920 struct drm_i915_gem_object, 2921 ring_list[ring->id]); 2922 2923 if (!list_empty(&obj->last_read_req[ring->id]->list)) 2924 break; 2925 2926 i915_gem_object_retire__read(obj, ring->id); 2927 } 2928 2929 if (unlikely(ring->trace_irq_req && 2930 i915_gem_request_completed(ring->trace_irq_req, true))) { 2931 ring->irq_put(ring); 2932 i915_gem_request_assign(&ring->trace_irq_req, NULL); 2933 } 2934 2935 WARN_ON(i915_verify_lists(ring->dev)); 2936 } 2937 2938 bool 2939 i915_gem_retire_requests(struct drm_device *dev) 2940 { 2941 struct drm_i915_private *dev_priv = dev->dev_private; 2942 struct intel_engine_cs *ring; 2943 bool idle = true; 2944 int i; 2945 2946 for_each_ring(ring, dev_priv, i) { 2947 i915_gem_retire_requests_ring(ring); 2948 idle &= list_empty(&ring->request_list); 2949 if (i915.enable_execlists) { 2950 2951 lockmgr(&ring->execlist_lock, LK_EXCLUSIVE); 2952 idle &= list_empty(&ring->execlist_queue); 2953 lockmgr(&ring->execlist_lock, LK_RELEASE); 2954 2955 intel_execlists_retire_requests(ring); 2956 } 2957 } 2958 2959 if (idle) 2960 mod_delayed_work(dev_priv->wq, 2961 &dev_priv->mm.idle_work, 2962 msecs_to_jiffies(100)); 2963 2964 return idle; 2965 } 2966 2967 static void 2968 i915_gem_retire_work_handler(struct work_struct *work) 2969 { 2970 struct drm_i915_private *dev_priv = 2971 container_of(work, typeof(*dev_priv), mm.retire_work.work); 2972 struct drm_device *dev = dev_priv->dev; 2973 bool idle; 2974 2975 /* Come back later if the device is busy... */ 2976 idle = false; 2977 if (mutex_trylock(&dev->struct_mutex)) { 2978 idle = i915_gem_retire_requests(dev); 2979 mutex_unlock(&dev->struct_mutex); 2980 } 2981 if (!idle) 2982 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 2983 round_jiffies_up_relative(HZ)); 2984 } 2985 2986 static void 2987 i915_gem_idle_work_handler(struct work_struct *work) 2988 { 2989 struct drm_i915_private *dev_priv = 2990 container_of(work, typeof(*dev_priv), mm.idle_work.work); 2991 struct drm_device *dev = dev_priv->dev; 2992 struct intel_engine_cs *ring; 2993 int i; 2994 2995 for_each_ring(ring, dev_priv, i) 2996 if (!list_empty(&ring->request_list)) 2997 return; 2998 2999 intel_mark_idle(dev); 3000 3001 if (mutex_trylock(&dev->struct_mutex)) { 3002 struct intel_engine_cs *ring; 3003 int i; 3004 3005 for_each_ring(ring, dev_priv, i) 3006 i915_gem_batch_pool_fini(&ring->batch_pool); 3007 3008 mutex_unlock(&dev->struct_mutex); 3009 } 3010 } 3011 3012 /** 3013 * Ensures that an object will eventually get non-busy by flushing any required 3014 * write domains, emitting any outstanding lazy request and retiring and 3015 * completed requests. 3016 */ 3017 static int 3018 i915_gem_object_flush_active(struct drm_i915_gem_object *obj) 3019 { 3020 int ret, i; 3021 3022 if (!obj->active) 3023 return 0; 3024 3025 for (i = 0; i < I915_NUM_RINGS; i++) { 3026 struct drm_i915_gem_request *req; 3027 3028 req = obj->last_read_req[i]; 3029 if (req == NULL) 3030 continue; 3031 3032 if (list_empty(&req->list)) 3033 goto retire; 3034 3035 ret = i915_gem_check_olr(req); 3036 if (ret) 3037 return ret; 3038 3039 if (i915_gem_request_completed(req, true)) { 3040 __i915_gem_request_retire__upto(req); 3041 retire: 3042 i915_gem_object_retire__read(obj, i); 3043 } 3044 } 3045 3046 return 0; 3047 } 3048 3049 /** 3050 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT 3051 * @DRM_IOCTL_ARGS: standard ioctl arguments 3052 * 3053 * Returns 0 if successful, else an error is returned with the remaining time in 3054 * the timeout parameter. 3055 * -ETIME: object is still busy after timeout 3056 * -ERESTARTSYS: signal interrupted the wait 3057 * -ENONENT: object doesn't exist 3058 * Also possible, but rare: 3059 * -EAGAIN: GPU wedged 3060 * -ENOMEM: damn 3061 * -ENODEV: Internal IRQ fail 3062 * -E?: The add request failed 3063 * 3064 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any 3065 * non-zero timeout parameter the wait ioctl will wait for the given number of 3066 * nanoseconds on an object becoming unbusy. Since the wait itself does so 3067 * without holding struct_mutex the object may become re-busied before this 3068 * function completes. A similar but shorter * race condition exists in the busy 3069 * ioctl 3070 */ 3071 int 3072 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 3073 { 3074 struct drm_i915_private *dev_priv = dev->dev_private; 3075 struct drm_i915_gem_wait *args = data; 3076 struct drm_i915_gem_object *obj; 3077 struct drm_i915_gem_request *req[I915_NUM_RINGS]; 3078 unsigned reset_counter; 3079 int i, n = 0; 3080 int ret; 3081 3082 if (args->flags != 0) 3083 return -EINVAL; 3084 3085 ret = i915_mutex_lock_interruptible(dev); 3086 if (ret) 3087 return ret; 3088 3089 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->bo_handle)); 3090 if (&obj->base == NULL) { 3091 mutex_unlock(&dev->struct_mutex); 3092 return -ENOENT; 3093 } 3094 3095 /* Need to make sure the object gets inactive eventually. */ 3096 ret = i915_gem_object_flush_active(obj); 3097 if (ret) 3098 goto out; 3099 3100 if (!obj->active) 3101 goto out; 3102 3103 /* Do this after OLR check to make sure we make forward progress polling 3104 * on this IOCTL with a timeout == 0 (like busy ioctl) 3105 */ 3106 if (args->timeout_ns == 0) { 3107 ret = -ETIME; 3108 goto out; 3109 } 3110 3111 drm_gem_object_unreference(&obj->base); 3112 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 3113 3114 for (i = 0; i < I915_NUM_RINGS; i++) { 3115 if (obj->last_read_req[i] == NULL) 3116 continue; 3117 3118 req[n++] = i915_gem_request_reference(obj->last_read_req[i]); 3119 } 3120 3121 mutex_unlock(&dev->struct_mutex); 3122 3123 for (i = 0; i < n; i++) { 3124 if (ret == 0) 3125 ret = __i915_wait_request(req[i], reset_counter, true, 3126 args->timeout_ns > 0 ? &args->timeout_ns : NULL, 3127 file->driver_priv); 3128 i915_gem_request_unreference__unlocked(req[i]); 3129 } 3130 return ret; 3131 3132 out: 3133 drm_gem_object_unreference(&obj->base); 3134 mutex_unlock(&dev->struct_mutex); 3135 return ret; 3136 } 3137 3138 static int 3139 __i915_gem_object_sync(struct drm_i915_gem_object *obj, 3140 struct intel_engine_cs *to, 3141 struct drm_i915_gem_request *req) 3142 { 3143 struct intel_engine_cs *from; 3144 int ret; 3145 3146 from = i915_gem_request_get_ring(req); 3147 if (to == from) 3148 return 0; 3149 3150 if (i915_gem_request_completed(req, true)) 3151 return 0; 3152 3153 ret = i915_gem_check_olr(req); 3154 if (ret) 3155 return ret; 3156 3157 if (!i915_semaphore_is_enabled(obj->base.dev)) { 3158 struct drm_i915_private *i915 = to_i915(obj->base.dev); 3159 ret = __i915_wait_request(req, 3160 atomic_read(&i915->gpu_error.reset_counter), 3161 i915->mm.interruptible, 3162 NULL, 3163 &i915->rps.semaphores); 3164 if (ret) 3165 return ret; 3166 3167 i915_gem_object_retire_request(obj, req); 3168 } else { 3169 int idx = intel_ring_sync_index(from, to); 3170 u32 seqno = i915_gem_request_get_seqno(req); 3171 3172 if (seqno <= from->semaphore.sync_seqno[idx]) 3173 return 0; 3174 3175 trace_i915_gem_ring_sync_to(from, to, req); 3176 ret = to->semaphore.sync_to(to, from, seqno); 3177 if (ret) 3178 return ret; 3179 3180 /* We use last_read_req because sync_to() 3181 * might have just caused seqno wrap under 3182 * the radar. 3183 */ 3184 from->semaphore.sync_seqno[idx] = 3185 i915_gem_request_get_seqno(obj->last_read_req[from->id]); 3186 } 3187 3188 return 0; 3189 } 3190 3191 /** 3192 * i915_gem_object_sync - sync an object to a ring. 3193 * 3194 * @obj: object which may be in use on another ring. 3195 * @to: ring we wish to use the object on. May be NULL. 3196 * 3197 * This code is meant to abstract object synchronization with the GPU. 3198 * Calling with NULL implies synchronizing the object with the CPU 3199 * rather than a particular GPU ring. Conceptually we serialise writes 3200 * between engines inside the GPU. We only allow on engine to write 3201 * into a buffer at any time, but multiple readers. To ensure each has 3202 * a coherent view of memory, we must: 3203 * 3204 * - If there is an outstanding write request to the object, the new 3205 * request must wait for it to complete (either CPU or in hw, requests 3206 * on the same ring will be naturally ordered). 3207 * 3208 * - If we are a write request (pending_write_domain is set), the new 3209 * request must wait for outstanding read requests to complete. 3210 * 3211 * Returns 0 if successful, else propagates up the lower layer error. 3212 */ 3213 int 3214 i915_gem_object_sync(struct drm_i915_gem_object *obj, 3215 struct intel_engine_cs *to) 3216 { 3217 const bool readonly = obj->base.pending_write_domain == 0; 3218 struct drm_i915_gem_request *req[I915_NUM_RINGS]; 3219 int ret, i, n; 3220 3221 if (!obj->active) 3222 return 0; 3223 3224 if (to == NULL) 3225 return i915_gem_object_wait_rendering(obj, readonly); 3226 3227 n = 0; 3228 if (readonly) { 3229 if (obj->last_write_req) 3230 req[n++] = obj->last_write_req; 3231 } else { 3232 for (i = 0; i < I915_NUM_RINGS; i++) 3233 if (obj->last_read_req[i]) 3234 req[n++] = obj->last_read_req[i]; 3235 } 3236 for (i = 0; i < n; i++) { 3237 ret = __i915_gem_object_sync(obj, to, req[i]); 3238 if (ret) 3239 return ret; 3240 } 3241 3242 return 0; 3243 } 3244 3245 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) 3246 { 3247 u32 old_write_domain, old_read_domains; 3248 3249 /* Force a pagefault for domain tracking on next user access */ 3250 i915_gem_release_mmap(obj); 3251 3252 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3253 return; 3254 3255 /* Wait for any direct GTT access to complete */ 3256 mb(); 3257 3258 old_read_domains = obj->base.read_domains; 3259 old_write_domain = obj->base.write_domain; 3260 3261 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT; 3262 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT; 3263 3264 trace_i915_gem_object_change_domain(obj, 3265 old_read_domains, 3266 old_write_domain); 3267 } 3268 3269 int i915_vma_unbind(struct i915_vma *vma) 3270 { 3271 struct drm_i915_gem_object *obj = vma->obj; 3272 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3273 int ret; 3274 3275 if (list_empty(&vma->vma_link)) 3276 return 0; 3277 3278 if (!drm_mm_node_allocated(&vma->node)) { 3279 i915_gem_vma_destroy(vma); 3280 return 0; 3281 } 3282 3283 if (vma->pin_count) 3284 return -EBUSY; 3285 3286 BUG_ON(obj->pages == NULL); 3287 3288 ret = i915_gem_object_wait_rendering(obj, false); 3289 if (ret) 3290 return ret; 3291 /* Continue on if we fail due to EIO, the GPU is hung so we 3292 * should be safe and we need to cleanup or else we might 3293 * cause memory corruption through use-after-free. 3294 */ 3295 3296 if (i915_is_ggtt(vma->vm) && 3297 vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { 3298 i915_gem_object_finish_gtt(obj); 3299 3300 /* release the fence reg _after_ flushing */ 3301 ret = i915_gem_object_put_fence(obj); 3302 if (ret) 3303 return ret; 3304 } 3305 3306 trace_i915_vma_unbind(vma); 3307 3308 vma->vm->unbind_vma(vma); 3309 vma->bound = 0; 3310 3311 list_del_init(&vma->mm_list); 3312 if (i915_is_ggtt(vma->vm)) { 3313 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { 3314 obj->map_and_fenceable = false; 3315 } else if (vma->ggtt_view.pages) { 3316 kfree(vma->ggtt_view.pages); 3317 } 3318 vma->ggtt_view.pages = NULL; 3319 } 3320 3321 drm_mm_remove_node(&vma->node); 3322 i915_gem_vma_destroy(vma); 3323 3324 /* Since the unbound list is global, only move to that list if 3325 * no more VMAs exist. */ 3326 if (list_empty(&obj->vma_list)) 3327 list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list); 3328 3329 /* And finally now the object is completely decoupled from this vma, 3330 * we can drop its hold on the backing storage and allow it to be 3331 * reaped by the shrinker. 3332 */ 3333 i915_gem_object_unpin_pages(obj); 3334 3335 return 0; 3336 } 3337 3338 int i915_gpu_idle(struct drm_device *dev) 3339 { 3340 struct drm_i915_private *dev_priv = dev->dev_private; 3341 struct intel_engine_cs *ring; 3342 int ret, i; 3343 3344 /* Flush everything onto the inactive list. */ 3345 for_each_ring(ring, dev_priv, i) { 3346 if (!i915.enable_execlists) { 3347 ret = i915_switch_context(ring, ring->default_context); 3348 if (ret) 3349 return ret; 3350 } 3351 3352 ret = intel_ring_idle(ring); 3353 if (ret) 3354 return ret; 3355 } 3356 3357 WARN_ON(i915_verify_lists(dev)); 3358 return 0; 3359 } 3360 3361 static void i965_write_fence_reg(struct drm_device *dev, int reg, 3362 struct drm_i915_gem_object *obj) 3363 { 3364 struct drm_i915_private *dev_priv = dev->dev_private; 3365 int fence_reg; 3366 int fence_pitch_shift; 3367 3368 if (INTEL_INFO(dev)->gen >= 6) { 3369 fence_reg = FENCE_REG_SANDYBRIDGE_0; 3370 fence_pitch_shift = SANDYBRIDGE_FENCE_PITCH_SHIFT; 3371 } else { 3372 fence_reg = FENCE_REG_965_0; 3373 fence_pitch_shift = I965_FENCE_PITCH_SHIFT; 3374 } 3375 3376 fence_reg += reg * 8; 3377 3378 /* To w/a incoherency with non-atomic 64-bit register updates, 3379 * we split the 64-bit update into two 32-bit writes. In order 3380 * for a partial fence not to be evaluated between writes, we 3381 * precede the update with write to turn off the fence register, 3382 * and only enable the fence as the last step. 3383 * 3384 * For extra levels of paranoia, we make sure each step lands 3385 * before applying the next step. 3386 */ 3387 I915_WRITE(fence_reg, 0); 3388 POSTING_READ(fence_reg); 3389 3390 if (obj) { 3391 u32 size = i915_gem_obj_ggtt_size(obj); 3392 uint64_t val; 3393 3394 val = (uint64_t)((i915_gem_obj_ggtt_offset(obj) + size - 4096) & 3395 0xfffff000) << 32; 3396 val |= i915_gem_obj_ggtt_offset(obj) & 0xfffff000; 3397 val |= (uint64_t)((obj->stride / 128) - 1) << fence_pitch_shift; 3398 if (obj->tiling_mode == I915_TILING_Y) 3399 val |= 1 << I965_FENCE_TILING_Y_SHIFT; 3400 val |= I965_FENCE_REG_VALID; 3401 3402 I915_WRITE(fence_reg + 4, val >> 32); 3403 POSTING_READ(fence_reg + 4); 3404 3405 I915_WRITE(fence_reg + 0, val); 3406 POSTING_READ(fence_reg); 3407 } else { 3408 I915_WRITE(fence_reg + 4, 0); 3409 POSTING_READ(fence_reg + 4); 3410 } 3411 } 3412 3413 static void i915_write_fence_reg(struct drm_device *dev, int reg, 3414 struct drm_i915_gem_object *obj) 3415 { 3416 struct drm_i915_private *dev_priv = dev->dev_private; 3417 u32 val; 3418 3419 if (obj) { 3420 u32 size = i915_gem_obj_ggtt_size(obj); 3421 int pitch_val; 3422 int tile_width; 3423 3424 WARN((i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK) || 3425 (size & -size) != size || 3426 (i915_gem_obj_ggtt_offset(obj) & (size - 1)), 3427 "object 0x%08lx [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n", 3428 i915_gem_obj_ggtt_offset(obj), obj->map_and_fenceable, size); 3429 3430 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)) 3431 tile_width = 128; 3432 else 3433 tile_width = 512; 3434 3435 /* Note: pitch better be a power of two tile widths */ 3436 pitch_val = obj->stride / tile_width; 3437 pitch_val = ffs(pitch_val) - 1; 3438 3439 val = i915_gem_obj_ggtt_offset(obj); 3440 if (obj->tiling_mode == I915_TILING_Y) 3441 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 3442 val |= I915_FENCE_SIZE_BITS(size); 3443 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 3444 val |= I830_FENCE_REG_VALID; 3445 } else 3446 val = 0; 3447 3448 if (reg < 8) 3449 reg = FENCE_REG_830_0 + reg * 4; 3450 else 3451 reg = FENCE_REG_945_8 + (reg - 8) * 4; 3452 3453 I915_WRITE(reg, val); 3454 POSTING_READ(reg); 3455 } 3456 3457 static void i830_write_fence_reg(struct drm_device *dev, int reg, 3458 struct drm_i915_gem_object *obj) 3459 { 3460 struct drm_i915_private *dev_priv = dev->dev_private; 3461 uint32_t val; 3462 3463 if (obj) { 3464 u32 size = i915_gem_obj_ggtt_size(obj); 3465 uint32_t pitch_val; 3466 3467 WARN((i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK) || 3468 (size & -size) != size || 3469 (i915_gem_obj_ggtt_offset(obj) & (size - 1)), 3470 "object 0x%08lx not 512K or pot-size 0x%08x aligned\n", 3471 i915_gem_obj_ggtt_offset(obj), size); 3472 3473 pitch_val = obj->stride / 128; 3474 pitch_val = ffs(pitch_val) - 1; 3475 3476 val = i915_gem_obj_ggtt_offset(obj); 3477 if (obj->tiling_mode == I915_TILING_Y) 3478 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 3479 val |= I830_FENCE_SIZE_BITS(size); 3480 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 3481 val |= I830_FENCE_REG_VALID; 3482 } else 3483 val = 0; 3484 3485 I915_WRITE(FENCE_REG_830_0 + reg * 4, val); 3486 POSTING_READ(FENCE_REG_830_0 + reg * 4); 3487 } 3488 3489 inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj) 3490 { 3491 return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT; 3492 } 3493 3494 static void i915_gem_write_fence(struct drm_device *dev, int reg, 3495 struct drm_i915_gem_object *obj) 3496 { 3497 struct drm_i915_private *dev_priv = dev->dev_private; 3498 3499 /* Ensure that all CPU reads are completed before installing a fence 3500 * and all writes before removing the fence. 3501 */ 3502 if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj)) 3503 mb(); 3504 3505 WARN(obj && (!obj->stride || !obj->tiling_mode), 3506 "bogus fence setup with stride: 0x%x, tiling mode: %i\n", 3507 obj->stride, obj->tiling_mode); 3508 3509 if (IS_GEN2(dev)) 3510 i830_write_fence_reg(dev, reg, obj); 3511 else if (IS_GEN3(dev)) 3512 i915_write_fence_reg(dev, reg, obj); 3513 else if (INTEL_INFO(dev)->gen >= 4) 3514 i965_write_fence_reg(dev, reg, obj); 3515 3516 /* And similarly be paranoid that no direct access to this region 3517 * is reordered to before the fence is installed. 3518 */ 3519 if (i915_gem_object_needs_mb(obj)) 3520 mb(); 3521 } 3522 3523 static inline int fence_number(struct drm_i915_private *dev_priv, 3524 struct drm_i915_fence_reg *fence) 3525 { 3526 return fence - dev_priv->fence_regs; 3527 } 3528 3529 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, 3530 struct drm_i915_fence_reg *fence, 3531 bool enable) 3532 { 3533 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3534 int reg = fence_number(dev_priv, fence); 3535 3536 i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL); 3537 3538 if (enable) { 3539 obj->fence_reg = reg; 3540 fence->obj = obj; 3541 list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list); 3542 } else { 3543 obj->fence_reg = I915_FENCE_REG_NONE; 3544 fence->obj = NULL; 3545 list_del_init(&fence->lru_list); 3546 } 3547 obj->fence_dirty = false; 3548 } 3549 3550 static int 3551 i915_gem_object_wait_fence(struct drm_i915_gem_object *obj) 3552 { 3553 if (obj->last_fenced_req) { 3554 int ret = i915_wait_request(obj->last_fenced_req); 3555 if (ret) 3556 return ret; 3557 3558 i915_gem_request_assign(&obj->last_fenced_req, NULL); 3559 } 3560 3561 return 0; 3562 } 3563 3564 int 3565 i915_gem_object_put_fence(struct drm_i915_gem_object *obj) 3566 { 3567 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3568 struct drm_i915_fence_reg *fence; 3569 int ret; 3570 3571 ret = i915_gem_object_wait_fence(obj); 3572 if (ret) 3573 return ret; 3574 3575 if (obj->fence_reg == I915_FENCE_REG_NONE) 3576 return 0; 3577 3578 fence = &dev_priv->fence_regs[obj->fence_reg]; 3579 3580 if (WARN_ON(fence->pin_count)) 3581 return -EBUSY; 3582 3583 i915_gem_object_fence_lost(obj); 3584 i915_gem_object_update_fence(obj, fence, false); 3585 3586 return 0; 3587 } 3588 3589 static struct drm_i915_fence_reg * 3590 i915_find_fence_reg(struct drm_device *dev) 3591 { 3592 struct drm_i915_private *dev_priv = dev->dev_private; 3593 struct drm_i915_fence_reg *reg, *avail; 3594 int i; 3595 3596 /* First try to find a free reg */ 3597 avail = NULL; 3598 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) { 3599 reg = &dev_priv->fence_regs[i]; 3600 if (!reg->obj) 3601 return reg; 3602 3603 if (!reg->pin_count) 3604 avail = reg; 3605 } 3606 3607 if (avail == NULL) 3608 goto deadlock; 3609 3610 /* None available, try to steal one or wait for a user to finish */ 3611 list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) { 3612 if (reg->pin_count) 3613 continue; 3614 3615 return reg; 3616 } 3617 3618 deadlock: 3619 /* Wait for completion of pending flips which consume fences */ 3620 if (intel_has_pending_fb_unpin(dev)) 3621 return ERR_PTR(-EAGAIN); 3622 3623 return ERR_PTR(-EDEADLK); 3624 } 3625 3626 /** 3627 * i915_gem_object_get_fence - set up fencing for an object 3628 * @obj: object to map through a fence reg 3629 * 3630 * When mapping objects through the GTT, userspace wants to be able to write 3631 * to them without having to worry about swizzling if the object is tiled. 3632 * This function walks the fence regs looking for a free one for @obj, 3633 * stealing one if it can't find any. 3634 * 3635 * It then sets up the reg based on the object's properties: address, pitch 3636 * and tiling format. 3637 * 3638 * For an untiled surface, this removes any existing fence. 3639 */ 3640 int 3641 i915_gem_object_get_fence(struct drm_i915_gem_object *obj) 3642 { 3643 struct drm_device *dev = obj->base.dev; 3644 struct drm_i915_private *dev_priv = dev->dev_private; 3645 bool enable = obj->tiling_mode != I915_TILING_NONE; 3646 struct drm_i915_fence_reg *reg; 3647 int ret; 3648 3649 /* Have we updated the tiling parameters upon the object and so 3650 * will need to serialise the write to the associated fence register? 3651 */ 3652 if (obj->fence_dirty) { 3653 ret = i915_gem_object_wait_fence(obj); 3654 if (ret) 3655 return ret; 3656 } 3657 3658 /* Just update our place in the LRU if our fence is getting reused. */ 3659 if (obj->fence_reg != I915_FENCE_REG_NONE) { 3660 reg = &dev_priv->fence_regs[obj->fence_reg]; 3661 if (!obj->fence_dirty) { 3662 list_move_tail(®->lru_list, 3663 &dev_priv->mm.fence_list); 3664 return 0; 3665 } 3666 } else if (enable) { 3667 if (WARN_ON(!obj->map_and_fenceable)) 3668 return -EINVAL; 3669 3670 reg = i915_find_fence_reg(dev); 3671 if (IS_ERR(reg)) 3672 return PTR_ERR(reg); 3673 3674 if (reg->obj) { 3675 struct drm_i915_gem_object *old = reg->obj; 3676 3677 ret = i915_gem_object_wait_fence(old); 3678 if (ret) 3679 return ret; 3680 3681 i915_gem_object_fence_lost(old); 3682 } 3683 } else 3684 return 0; 3685 3686 i915_gem_object_update_fence(obj, reg, enable); 3687 3688 return 0; 3689 } 3690 3691 static bool i915_gem_valid_gtt_space(struct i915_vma *vma, 3692 unsigned long cache_level) 3693 { 3694 struct drm_mm_node *gtt_space = &vma->node; 3695 struct drm_mm_node *other; 3696 3697 /* 3698 * On some machines we have to be careful when putting differing types 3699 * of snoopable memory together to avoid the prefetcher crossing memory 3700 * domains and dying. During vm initialisation, we decide whether or not 3701 * these constraints apply and set the drm_mm.color_adjust 3702 * appropriately. 3703 */ 3704 if (vma->vm->mm.color_adjust == NULL) 3705 return true; 3706 3707 if (!drm_mm_node_allocated(gtt_space)) 3708 return true; 3709 3710 if (list_empty(>t_space->node_list)) 3711 return true; 3712 3713 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list); 3714 if (other->allocated && !other->hole_follows && other->color != cache_level) 3715 return false; 3716 3717 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list); 3718 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level) 3719 return false; 3720 3721 return true; 3722 } 3723 3724 /** 3725 * Finds free space in the GTT aperture and binds the object or a view of it 3726 * there. 3727 */ 3728 static struct i915_vma * 3729 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, 3730 struct i915_address_space *vm, 3731 const struct i915_ggtt_view *ggtt_view, 3732 unsigned alignment, 3733 uint64_t flags) 3734 { 3735 struct drm_device *dev = obj->base.dev; 3736 struct drm_i915_private *dev_priv = dev->dev_private; 3737 u32 size, fence_size, fence_alignment, unfenced_alignment; 3738 unsigned long start = 3739 flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; 3740 unsigned long end = 3741 flags & PIN_MAPPABLE ? dev_priv->gtt.mappable_end : vm->total; 3742 struct i915_vma *vma; 3743 int ret; 3744 3745 if (i915_is_ggtt(vm)) { 3746 u32 view_size; 3747 3748 if (WARN_ON(!ggtt_view)) 3749 return ERR_PTR(-EINVAL); 3750 3751 view_size = i915_ggtt_view_size(obj, ggtt_view); 3752 3753 fence_size = i915_gem_get_gtt_size(dev, 3754 view_size, 3755 obj->tiling_mode); 3756 fence_alignment = i915_gem_get_gtt_alignment(dev, 3757 view_size, 3758 obj->tiling_mode, 3759 true); 3760 unfenced_alignment = i915_gem_get_gtt_alignment(dev, 3761 view_size, 3762 obj->tiling_mode, 3763 false); 3764 size = flags & PIN_MAPPABLE ? fence_size : view_size; 3765 } else { 3766 fence_size = i915_gem_get_gtt_size(dev, 3767 obj->base.size, 3768 obj->tiling_mode); 3769 fence_alignment = i915_gem_get_gtt_alignment(dev, 3770 obj->base.size, 3771 obj->tiling_mode, 3772 true); 3773 unfenced_alignment = 3774 i915_gem_get_gtt_alignment(dev, 3775 obj->base.size, 3776 obj->tiling_mode, 3777 false); 3778 size = flags & PIN_MAPPABLE ? fence_size : obj->base.size; 3779 } 3780 3781 if (alignment == 0) 3782 alignment = flags & PIN_MAPPABLE ? fence_alignment : 3783 unfenced_alignment; 3784 if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) { 3785 DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n", 3786 ggtt_view ? ggtt_view->type : 0, 3787 alignment); 3788 return ERR_PTR(-EINVAL); 3789 } 3790 3791 /* If binding the object/GGTT view requires more space than the entire 3792 * aperture has, reject it early before evicting everything in a vain 3793 * attempt to find space. 3794 */ 3795 if (size > end) { 3796 DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%u > %s aperture=%lu\n", 3797 ggtt_view ? ggtt_view->type : 0, 3798 size, 3799 flags & PIN_MAPPABLE ? "mappable" : "total", 3800 end); 3801 return ERR_PTR(-E2BIG); 3802 } 3803 3804 ret = i915_gem_object_get_pages(obj); 3805 if (ret) 3806 return ERR_PTR(ret); 3807 3808 i915_gem_object_pin_pages(obj); 3809 3810 vma = ggtt_view ? i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) : 3811 i915_gem_obj_lookup_or_create_vma(obj, vm); 3812 3813 if (IS_ERR(vma)) 3814 goto err_unpin; 3815 3816 search_free: 3817 ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, 3818 size, alignment, 3819 obj->cache_level, 3820 start, end, 3821 DRM_MM_SEARCH_DEFAULT, 3822 DRM_MM_CREATE_DEFAULT); 3823 if (ret) { 3824 ret = i915_gem_evict_something(dev, vm, size, alignment, 3825 obj->cache_level, 3826 start, end, 3827 flags); 3828 if (ret == 0) 3829 goto search_free; 3830 3831 goto err_free_vma; 3832 } 3833 if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) { 3834 ret = -EINVAL; 3835 goto err_remove_node; 3836 } 3837 3838 trace_i915_vma_bind(vma, flags); 3839 ret = i915_vma_bind(vma, obj->cache_level, flags); 3840 if (ret) 3841 goto err_remove_node; 3842 3843 list_move_tail(&obj->global_list, &dev_priv->mm.bound_list); 3844 list_add_tail(&vma->mm_list, &vm->inactive_list); 3845 3846 return vma; 3847 3848 err_remove_node: 3849 drm_mm_remove_node(&vma->node); 3850 err_free_vma: 3851 i915_gem_vma_destroy(vma); 3852 vma = ERR_PTR(ret); 3853 err_unpin: 3854 i915_gem_object_unpin_pages(obj); 3855 return vma; 3856 } 3857 3858 bool 3859 i915_gem_clflush_object(struct drm_i915_gem_object *obj, 3860 bool force) 3861 { 3862 /* If we don't have a page list set up, then we're not pinned 3863 * to GPU, and we can ignore the cache flush because it'll happen 3864 * again at bind time. 3865 */ 3866 if (obj->pages == NULL) 3867 return false; 3868 3869 /* 3870 * Stolen memory is always coherent with the GPU as it is explicitly 3871 * marked as wc by the system, or the system is cache-coherent. 3872 */ 3873 if (obj->stolen) 3874 return false; 3875 3876 /* If the GPU is snooping the contents of the CPU cache, 3877 * we do not need to manually clear the CPU cache lines. However, 3878 * the caches are only snooped when the render cache is 3879 * flushed/invalidated. As we always have to emit invalidations 3880 * and flushes when moving into and out of the RENDER domain, correct 3881 * snooping behaviour occurs naturally as the result of our domain 3882 * tracking. 3883 */ 3884 if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) { 3885 obj->cache_dirty = true; 3886 return false; 3887 } 3888 3889 trace_i915_gem_object_clflush(obj); 3890 drm_clflush_pages(obj->pages, obj->base.size / PAGE_SIZE); 3891 obj->cache_dirty = false; 3892 3893 return true; 3894 } 3895 3896 /** Flushes the GTT write domain for the object if it's dirty. */ 3897 static void 3898 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) 3899 { 3900 uint32_t old_write_domain; 3901 3902 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) 3903 return; 3904 3905 /* No actual flushing is required for the GTT write domain. Writes 3906 * to it immediately go to main memory as far as we know, so there's 3907 * no chipset flush. It also doesn't land in render cache. 3908 * 3909 * However, we do have to enforce the order so that all writes through 3910 * the GTT land before any writes to the device, such as updates to 3911 * the GATT itself. 3912 */ 3913 wmb(); 3914 3915 old_write_domain = obj->base.write_domain; 3916 obj->base.write_domain = 0; 3917 3918 intel_fb_obj_flush(obj, false); 3919 3920 intel_fb_obj_flush(obj, false); 3921 3922 trace_i915_gem_object_change_domain(obj, 3923 obj->base.read_domains, 3924 old_write_domain); 3925 } 3926 3927 /** Flushes the CPU write domain for the object if it's dirty. */ 3928 static void 3929 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) 3930 { 3931 uint32_t old_write_domain; 3932 3933 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) 3934 return; 3935 3936 if (i915_gem_clflush_object(obj, obj->pin_display)) 3937 i915_gem_chipset_flush(obj->base.dev); 3938 3939 old_write_domain = obj->base.write_domain; 3940 obj->base.write_domain = 0; 3941 3942 trace_i915_gem_object_change_domain(obj, 3943 obj->base.read_domains, 3944 old_write_domain); 3945 } 3946 3947 /** 3948 * Moves a single object to the GTT read, and possibly write domain. 3949 * 3950 * This function returns when the move is complete, including waiting on 3951 * flushes to occur. 3952 */ 3953 int 3954 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 3955 { 3956 uint32_t old_write_domain, old_read_domains; 3957 struct i915_vma *vma; 3958 int ret; 3959 3960 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) 3961 return 0; 3962 3963 ret = i915_gem_object_wait_rendering(obj, !write); 3964 if (ret) 3965 return ret; 3966 3967 /* Flush and acquire obj->pages so that we are coherent through 3968 * direct access in memory with previous cached writes through 3969 * shmemfs and that our cache domain tracking remains valid. 3970 * For example, if the obj->filp was moved to swap without us 3971 * being notified and releasing the pages, we would mistakenly 3972 * continue to assume that the obj remained out of the CPU cached 3973 * domain. 3974 */ 3975 ret = i915_gem_object_get_pages(obj); 3976 if (ret) 3977 return ret; 3978 3979 i915_gem_object_flush_cpu_write_domain(obj); 3980 3981 /* Serialise direct access to this object with the barriers for 3982 * coherent writes from the GPU, by effectively invalidating the 3983 * GTT domain upon first access. 3984 */ 3985 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3986 mb(); 3987 3988 old_write_domain = obj->base.write_domain; 3989 old_read_domains = obj->base.read_domains; 3990 3991 /* It should now be out of any other write domains, and we can update 3992 * the domain values for our changes. 3993 */ 3994 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 3995 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 3996 if (write) { 3997 obj->base.read_domains = I915_GEM_DOMAIN_GTT; 3998 obj->base.write_domain = I915_GEM_DOMAIN_GTT; 3999 obj->dirty = 1; 4000 } 4001 4002 if (write) 4003 intel_fb_obj_invalidate(obj, NULL, ORIGIN_GTT); 4004 4005 trace_i915_gem_object_change_domain(obj, 4006 old_read_domains, 4007 old_write_domain); 4008 4009 /* And bump the LRU for this access */ 4010 vma = i915_gem_obj_to_ggtt(obj); 4011 if (vma && drm_mm_node_allocated(&vma->node) && !obj->active) 4012 list_move_tail(&vma->mm_list, 4013 &to_i915(obj->base.dev)->gtt.base.inactive_list); 4014 4015 return 0; 4016 } 4017 4018 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 4019 enum i915_cache_level cache_level) 4020 { 4021 struct drm_device *dev = obj->base.dev; 4022 struct i915_vma *vma, *next; 4023 int ret; 4024 4025 if (obj->cache_level == cache_level) 4026 return 0; 4027 4028 if (i915_gem_obj_is_pinned(obj)) { 4029 DRM_DEBUG("can not change the cache level of pinned objects\n"); 4030 return -EBUSY; 4031 } 4032 4033 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) { 4034 if (!i915_gem_valid_gtt_space(vma, cache_level)) { 4035 ret = i915_vma_unbind(vma); 4036 if (ret) 4037 return ret; 4038 } 4039 } 4040 4041 if (i915_gem_obj_bound_any(obj)) { 4042 ret = i915_gem_object_wait_rendering(obj, false); 4043 if (ret) 4044 return ret; 4045 4046 i915_gem_object_finish_gtt(obj); 4047 4048 /* Before SandyBridge, you could not use tiling or fence 4049 * registers with snooped memory, so relinquish any fences 4050 * currently pointing to our region in the aperture. 4051 */ 4052 if (INTEL_INFO(dev)->gen < 6) { 4053 ret = i915_gem_object_put_fence(obj); 4054 if (ret) 4055 return ret; 4056 } 4057 4058 list_for_each_entry(vma, &obj->vma_list, vma_link) 4059 if (drm_mm_node_allocated(&vma->node)) { 4060 ret = i915_vma_bind(vma, cache_level, 4061 PIN_UPDATE); 4062 if (ret) 4063 return ret; 4064 } 4065 } 4066 4067 list_for_each_entry(vma, &obj->vma_list, vma_link) 4068 vma->node.color = cache_level; 4069 obj->cache_level = cache_level; 4070 4071 if (obj->cache_dirty && 4072 obj->base.write_domain != I915_GEM_DOMAIN_CPU && 4073 cpu_write_needs_clflush(obj)) { 4074 if (i915_gem_clflush_object(obj, true)) 4075 i915_gem_chipset_flush(obj->base.dev); 4076 } 4077 4078 return 0; 4079 } 4080 4081 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 4082 struct drm_file *file) 4083 { 4084 struct drm_i915_gem_caching *args = data; 4085 struct drm_i915_gem_object *obj; 4086 4087 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4088 if (&obj->base == NULL) 4089 return -ENOENT; 4090 4091 switch (obj->cache_level) { 4092 case I915_CACHE_LLC: 4093 case I915_CACHE_L3_LLC: 4094 args->caching = I915_CACHING_CACHED; 4095 break; 4096 4097 case I915_CACHE_WT: 4098 args->caching = I915_CACHING_DISPLAY; 4099 break; 4100 4101 default: 4102 args->caching = I915_CACHING_NONE; 4103 break; 4104 } 4105 4106 drm_gem_object_unreference_unlocked(&obj->base); 4107 return 0; 4108 } 4109 4110 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 4111 struct drm_file *file) 4112 { 4113 struct drm_i915_gem_caching *args = data; 4114 struct drm_i915_gem_object *obj; 4115 enum i915_cache_level level; 4116 int ret; 4117 4118 switch (args->caching) { 4119 case I915_CACHING_NONE: 4120 level = I915_CACHE_NONE; 4121 break; 4122 case I915_CACHING_CACHED: 4123 level = I915_CACHE_LLC; 4124 break; 4125 case I915_CACHING_DISPLAY: 4126 level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE; 4127 break; 4128 default: 4129 return -EINVAL; 4130 } 4131 4132 ret = i915_mutex_lock_interruptible(dev); 4133 if (ret) 4134 return ret; 4135 4136 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4137 if (&obj->base == NULL) { 4138 ret = -ENOENT; 4139 goto unlock; 4140 } 4141 4142 ret = i915_gem_object_set_cache_level(obj, level); 4143 4144 drm_gem_object_unreference(&obj->base); 4145 unlock: 4146 mutex_unlock(&dev->struct_mutex); 4147 return ret; 4148 } 4149 4150 /* 4151 * Prepare buffer for display plane (scanout, cursors, etc). 4152 * Can be called from an uninterruptible phase (modesetting) and allows 4153 * any flushes to be pipelined (for pageflips). 4154 */ 4155 int 4156 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 4157 u32 alignment, 4158 struct intel_engine_cs *pipelined, 4159 const struct i915_ggtt_view *view) 4160 { 4161 u32 old_read_domains, old_write_domain; 4162 int ret; 4163 4164 ret = i915_gem_object_sync(obj, pipelined); 4165 if (ret) 4166 return ret; 4167 4168 /* Mark the pin_display early so that we account for the 4169 * display coherency whilst setting up the cache domains. 4170 */ 4171 obj->pin_display++; 4172 4173 /* The display engine is not coherent with the LLC cache on gen6. As 4174 * a result, we make sure that the pinning that is about to occur is 4175 * done with uncached PTEs. This is lowest common denominator for all 4176 * chipsets. 4177 * 4178 * However for gen6+, we could do better by using the GFDT bit instead 4179 * of uncaching, which would allow us to flush all the LLC-cached data 4180 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 4181 */ 4182 ret = i915_gem_object_set_cache_level(obj, 4183 HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE); 4184 if (ret) 4185 goto err_unpin_display; 4186 4187 /* As the user may map the buffer once pinned in the display plane 4188 * (e.g. libkms for the bootup splash), we have to ensure that we 4189 * always use map_and_fenceable for all scanout buffers. 4190 */ 4191 ret = i915_gem_object_ggtt_pin(obj, view, alignment, 4192 view->type == I915_GGTT_VIEW_NORMAL ? 4193 PIN_MAPPABLE : 0); 4194 if (ret) 4195 goto err_unpin_display; 4196 4197 i915_gem_object_flush_cpu_write_domain(obj); 4198 4199 old_write_domain = obj->base.write_domain; 4200 old_read_domains = obj->base.read_domains; 4201 4202 /* It should now be out of any other write domains, and we can update 4203 * the domain values for our changes. 4204 */ 4205 obj->base.write_domain = 0; 4206 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 4207 4208 trace_i915_gem_object_change_domain(obj, 4209 old_read_domains, 4210 old_write_domain); 4211 4212 return 0; 4213 4214 err_unpin_display: 4215 obj->pin_display--; 4216 return ret; 4217 } 4218 4219 void 4220 i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj, 4221 const struct i915_ggtt_view *view) 4222 { 4223 if (WARN_ON(obj->pin_display == 0)) 4224 return; 4225 4226 i915_gem_object_ggtt_unpin_view(obj, view); 4227 4228 obj->pin_display--; 4229 } 4230 4231 /** 4232 * Moves a single object to the CPU read, and possibly write domain. 4233 * 4234 * This function returns when the move is complete, including waiting on 4235 * flushes to occur. 4236 */ 4237 int 4238 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 4239 { 4240 uint32_t old_write_domain, old_read_domains; 4241 int ret; 4242 4243 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) 4244 return 0; 4245 4246 ret = i915_gem_object_wait_rendering(obj, !write); 4247 if (ret) 4248 return ret; 4249 4250 i915_gem_object_flush_gtt_write_domain(obj); 4251 4252 old_write_domain = obj->base.write_domain; 4253 old_read_domains = obj->base.read_domains; 4254 4255 /* Flush the CPU cache if it's still invalid. */ 4256 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { 4257 i915_gem_clflush_object(obj, false); 4258 4259 obj->base.read_domains |= I915_GEM_DOMAIN_CPU; 4260 } 4261 4262 /* It should now be out of any other write domains, and we can update 4263 * the domain values for our changes. 4264 */ 4265 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 4266 4267 /* If we're writing through the CPU, then the GPU read domains will 4268 * need to be invalidated at next use. 4269 */ 4270 if (write) { 4271 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4272 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4273 } 4274 4275 if (write) 4276 intel_fb_obj_invalidate(obj, NULL, ORIGIN_CPU); 4277 4278 trace_i915_gem_object_change_domain(obj, 4279 old_read_domains, 4280 old_write_domain); 4281 4282 return 0; 4283 } 4284 4285 /* Throttle our rendering by waiting until the ring has completed our requests 4286 * emitted over 20 msec ago. 4287 * 4288 * Note that if we were to use the current jiffies each time around the loop, 4289 * we wouldn't escape the function with any frames outstanding if the time to 4290 * render a frame was over 20ms. 4291 * 4292 * This should get us reasonable parallelism between CPU and GPU but also 4293 * relatively low latency when blocking on a particular request to finish. 4294 */ 4295 static int 4296 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 4297 { 4298 struct drm_i915_private *dev_priv = dev->dev_private; 4299 struct drm_i915_file_private *file_priv = file->driver_priv; 4300 unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES; 4301 struct drm_i915_gem_request *request, *target = NULL; 4302 unsigned reset_counter; 4303 int ret; 4304 4305 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 4306 if (ret) 4307 return ret; 4308 4309 ret = i915_gem_check_wedge(&dev_priv->gpu_error, false); 4310 if (ret) 4311 return ret; 4312 4313 spin_lock(&file_priv->mm.lock); 4314 list_for_each_entry(request, &file_priv->mm.request_list, client_list) { 4315 if (time_after_eq(request->emitted_jiffies, recent_enough)) 4316 break; 4317 4318 target = request; 4319 } 4320 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 4321 if (target) 4322 i915_gem_request_reference(target); 4323 spin_unlock(&file_priv->mm.lock); 4324 4325 if (target == NULL) 4326 return 0; 4327 4328 ret = __i915_wait_request(target, reset_counter, true, NULL, NULL); 4329 if (ret == 0) 4330 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0); 4331 4332 i915_gem_request_unreference__unlocked(target); 4333 4334 return ret; 4335 } 4336 4337 static bool 4338 i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags) 4339 { 4340 struct drm_i915_gem_object *obj = vma->obj; 4341 4342 if (alignment && 4343 vma->node.start & (alignment - 1)) 4344 return true; 4345 4346 if (flags & PIN_MAPPABLE && !obj->map_and_fenceable) 4347 return true; 4348 4349 if (flags & PIN_OFFSET_BIAS && 4350 vma->node.start < (flags & PIN_OFFSET_MASK)) 4351 return true; 4352 4353 return false; 4354 } 4355 4356 static int 4357 i915_gem_object_do_pin(struct drm_i915_gem_object *obj, 4358 struct i915_address_space *vm, 4359 const struct i915_ggtt_view *ggtt_view, 4360 uint32_t alignment, 4361 uint64_t flags) 4362 { 4363 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 4364 struct i915_vma *vma; 4365 unsigned bound; 4366 int ret; 4367 4368 if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base)) 4369 return -ENODEV; 4370 4371 if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm))) 4372 return -EINVAL; 4373 4374 if (WARN_ON((flags & (PIN_MAPPABLE | PIN_GLOBAL)) == PIN_MAPPABLE)) 4375 return -EINVAL; 4376 4377 if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view)) 4378 return -EINVAL; 4379 4380 vma = ggtt_view ? i915_gem_obj_to_ggtt_view(obj, ggtt_view) : 4381 i915_gem_obj_to_vma(obj, vm); 4382 4383 if (IS_ERR(vma)) 4384 return PTR_ERR(vma); 4385 4386 if (vma) { 4387 if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) 4388 return -EBUSY; 4389 4390 if (i915_vma_misplaced(vma, alignment, flags)) { 4391 unsigned long offset; 4392 offset = ggtt_view ? i915_gem_obj_ggtt_offset_view(obj, ggtt_view) : 4393 i915_gem_obj_offset(obj, vm); 4394 WARN(vma->pin_count, 4395 "bo is already pinned in %s with incorrect alignment:" 4396 " offset=%lx, req.alignment=%x, req.map_and_fenceable=%d," 4397 " obj->map_and_fenceable=%d\n", 4398 ggtt_view ? "ggtt" : "ppgtt", 4399 offset, 4400 alignment, 4401 !!(flags & PIN_MAPPABLE), 4402 obj->map_and_fenceable); 4403 ret = i915_vma_unbind(vma); 4404 if (ret) 4405 return ret; 4406 4407 vma = NULL; 4408 } 4409 } 4410 4411 bound = vma ? vma->bound : 0; 4412 if (vma == NULL || !drm_mm_node_allocated(&vma->node)) { 4413 vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment, 4414 flags); 4415 if (IS_ERR(vma)) 4416 return PTR_ERR(vma); 4417 } else { 4418 ret = i915_vma_bind(vma, obj->cache_level, flags); 4419 if (ret) 4420 return ret; 4421 } 4422 4423 if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL && 4424 (bound ^ vma->bound) & GLOBAL_BIND) { 4425 bool mappable, fenceable; 4426 u32 fence_size, fence_alignment; 4427 4428 fence_size = i915_gem_get_gtt_size(obj->base.dev, 4429 obj->base.size, 4430 obj->tiling_mode); 4431 fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev, 4432 obj->base.size, 4433 obj->tiling_mode, 4434 true); 4435 4436 fenceable = (vma->node.size == fence_size && 4437 (vma->node.start & (fence_alignment - 1)) == 0); 4438 4439 mappable = (vma->node.start + fence_size <= 4440 dev_priv->gtt.mappable_end); 4441 4442 obj->map_and_fenceable = mappable && fenceable; 4443 4444 WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable); 4445 } 4446 4447 vma->pin_count++; 4448 return 0; 4449 } 4450 4451 int 4452 i915_gem_object_pin(struct drm_i915_gem_object *obj, 4453 struct i915_address_space *vm, 4454 uint32_t alignment, 4455 uint64_t flags) 4456 { 4457 return i915_gem_object_do_pin(obj, vm, 4458 i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL, 4459 alignment, flags); 4460 } 4461 4462 int 4463 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, 4464 const struct i915_ggtt_view *view, 4465 uint32_t alignment, 4466 uint64_t flags) 4467 { 4468 if (WARN_ONCE(!view, "no view specified")) 4469 return -EINVAL; 4470 4471 return i915_gem_object_do_pin(obj, i915_obj_to_ggtt(obj), view, 4472 alignment, flags | PIN_GLOBAL); 4473 } 4474 4475 void 4476 i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj, 4477 const struct i915_ggtt_view *view) 4478 { 4479 struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view); 4480 4481 BUG_ON(!vma); 4482 WARN_ON(vma->pin_count == 0); 4483 WARN_ON(!i915_gem_obj_ggtt_bound_view(obj, view)); 4484 4485 --vma->pin_count; 4486 } 4487 4488 bool 4489 i915_gem_object_pin_fence(struct drm_i915_gem_object *obj) 4490 { 4491 if (obj->fence_reg != I915_FENCE_REG_NONE) { 4492 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 4493 struct i915_vma *ggtt_vma = i915_gem_obj_to_ggtt(obj); 4494 4495 WARN_ON(!ggtt_vma || 4496 dev_priv->fence_regs[obj->fence_reg].pin_count > 4497 ggtt_vma->pin_count); 4498 dev_priv->fence_regs[obj->fence_reg].pin_count++; 4499 return true; 4500 } else 4501 return false; 4502 } 4503 4504 void 4505 i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj) 4506 { 4507 if (obj->fence_reg != I915_FENCE_REG_NONE) { 4508 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 4509 WARN_ON(dev_priv->fence_regs[obj->fence_reg].pin_count <= 0); 4510 dev_priv->fence_regs[obj->fence_reg].pin_count--; 4511 } 4512 } 4513 4514 int 4515 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 4516 struct drm_file *file) 4517 { 4518 struct drm_i915_gem_busy *args = data; 4519 struct drm_i915_gem_object *obj; 4520 int ret; 4521 4522 ret = i915_mutex_lock_interruptible(dev); 4523 if (ret) 4524 return ret; 4525 4526 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4527 if (&obj->base == NULL) { 4528 ret = -ENOENT; 4529 goto unlock; 4530 } 4531 4532 /* Count all active objects as busy, even if they are currently not used 4533 * by the gpu. Users of this interface expect objects to eventually 4534 * become non-busy without any further actions, therefore emit any 4535 * necessary flushes here. 4536 */ 4537 ret = i915_gem_object_flush_active(obj); 4538 if (ret) 4539 goto unref; 4540 4541 BUILD_BUG_ON(I915_NUM_RINGS > 16); 4542 args->busy = obj->active << 16; 4543 if (obj->last_write_req) 4544 args->busy |= obj->last_write_req->ring->id; 4545 4546 unref: 4547 drm_gem_object_unreference(&obj->base); 4548 unlock: 4549 mutex_unlock(&dev->struct_mutex); 4550 return ret; 4551 } 4552 4553 int 4554 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 4555 struct drm_file *file_priv) 4556 { 4557 return i915_gem_ring_throttle(dev, file_priv); 4558 } 4559 4560 int 4561 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 4562 struct drm_file *file_priv) 4563 { 4564 struct drm_i915_private *dev_priv = dev->dev_private; 4565 struct drm_i915_gem_madvise *args = data; 4566 struct drm_i915_gem_object *obj; 4567 int ret; 4568 4569 switch (args->madv) { 4570 case I915_MADV_DONTNEED: 4571 case I915_MADV_WILLNEED: 4572 break; 4573 default: 4574 return -EINVAL; 4575 } 4576 4577 ret = i915_mutex_lock_interruptible(dev); 4578 if (ret) 4579 return ret; 4580 4581 obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle)); 4582 if (&obj->base == NULL) { 4583 ret = -ENOENT; 4584 goto unlock; 4585 } 4586 4587 if (i915_gem_obj_is_pinned(obj)) { 4588 ret = -EINVAL; 4589 goto out; 4590 } 4591 4592 if (obj->pages && 4593 obj->tiling_mode != I915_TILING_NONE && 4594 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 4595 if (obj->madv == I915_MADV_WILLNEED) 4596 i915_gem_object_unpin_pages(obj); 4597 if (args->madv == I915_MADV_WILLNEED) 4598 i915_gem_object_pin_pages(obj); 4599 } 4600 4601 if (obj->madv != __I915_MADV_PURGED) 4602 obj->madv = args->madv; 4603 4604 /* if the object is no longer attached, discard its backing storage */ 4605 if (obj->madv == I915_MADV_DONTNEED && obj->pages == NULL) 4606 i915_gem_object_truncate(obj); 4607 4608 args->retained = obj->madv != __I915_MADV_PURGED; 4609 4610 out: 4611 drm_gem_object_unreference(&obj->base); 4612 unlock: 4613 mutex_unlock(&dev->struct_mutex); 4614 return ret; 4615 } 4616 4617 void i915_gem_object_init(struct drm_i915_gem_object *obj, 4618 const struct drm_i915_gem_object_ops *ops) 4619 { 4620 int i; 4621 4622 INIT_LIST_HEAD(&obj->global_list); 4623 for (i = 0; i < I915_NUM_RINGS; i++) 4624 INIT_LIST_HEAD(&obj->ring_list[i]); 4625 INIT_LIST_HEAD(&obj->obj_exec_link); 4626 INIT_LIST_HEAD(&obj->vma_list); 4627 INIT_LIST_HEAD(&obj->batch_pool_link); 4628 4629 obj->ops = ops; 4630 4631 obj->fence_reg = I915_FENCE_REG_NONE; 4632 obj->madv = I915_MADV_WILLNEED; 4633 4634 i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size); 4635 } 4636 4637 static const struct drm_i915_gem_object_ops i915_gem_object_ops = { 4638 .get_pages = i915_gem_object_get_pages_gtt, 4639 .put_pages = i915_gem_object_put_pages_gtt, 4640 }; 4641 4642 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, 4643 size_t size) 4644 { 4645 struct drm_i915_gem_object *obj; 4646 #if 0 4647 struct address_space *mapping; 4648 gfp_t mask; 4649 #endif 4650 4651 obj = i915_gem_object_alloc(dev); 4652 if (obj == NULL) 4653 return NULL; 4654 4655 if (drm_gem_object_init(dev, &obj->base, size) != 0) { 4656 i915_gem_object_free(obj); 4657 return NULL; 4658 } 4659 4660 #if 0 4661 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 4662 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) { 4663 /* 965gm cannot relocate objects above 4GiB. */ 4664 mask &= ~__GFP_HIGHMEM; 4665 mask |= __GFP_DMA32; 4666 } 4667 4668 mapping = file_inode(obj->base.filp)->i_mapping; 4669 mapping_set_gfp_mask(mapping, mask); 4670 #endif 4671 4672 i915_gem_object_init(obj, &i915_gem_object_ops); 4673 4674 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4675 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4676 4677 if (HAS_LLC(dev)) { 4678 /* On some devices, we can have the GPU use the LLC (the CPU 4679 * cache) for about a 10% performance improvement 4680 * compared to uncached. Graphics requests other than 4681 * display scanout are coherent with the CPU in 4682 * accessing this cache. This means in this mode we 4683 * don't need to clflush on the CPU side, and on the 4684 * GPU side we only need to flush internal caches to 4685 * get data visible to the CPU. 4686 * 4687 * However, we maintain the display planes as UC, and so 4688 * need to rebind when first used as such. 4689 */ 4690 obj->cache_level = I915_CACHE_LLC; 4691 } else 4692 obj->cache_level = I915_CACHE_NONE; 4693 4694 trace_i915_gem_object_create(obj); 4695 4696 return obj; 4697 } 4698 4699 static bool discard_backing_storage(struct drm_i915_gem_object *obj) 4700 { 4701 /* If we are the last user of the backing storage (be it shmemfs 4702 * pages or stolen etc), we know that the pages are going to be 4703 * immediately released. In this case, we can then skip copying 4704 * back the contents from the GPU. 4705 */ 4706 4707 if (obj->madv != I915_MADV_WILLNEED) 4708 return false; 4709 4710 if (obj->base.vm_obj == NULL) 4711 return true; 4712 4713 /* At first glance, this looks racy, but then again so would be 4714 * userspace racing mmap against close. However, the first external 4715 * reference to the filp can only be obtained through the 4716 * i915_gem_mmap_ioctl() which safeguards us against the user 4717 * acquiring such a reference whilst we are in the middle of 4718 * freeing the object. 4719 */ 4720 #if 0 4721 return atomic_long_read(&obj->base.filp->f_count) == 1; 4722 #else 4723 return false; 4724 #endif 4725 } 4726 4727 void i915_gem_free_object(struct drm_gem_object *gem_obj) 4728 { 4729 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 4730 struct drm_device *dev = obj->base.dev; 4731 struct drm_i915_private *dev_priv = dev->dev_private; 4732 struct i915_vma *vma, *next; 4733 4734 intel_runtime_pm_get(dev_priv); 4735 4736 trace_i915_gem_object_destroy(obj); 4737 4738 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) { 4739 int ret; 4740 4741 vma->pin_count = 0; 4742 ret = i915_vma_unbind(vma); 4743 if (WARN_ON(ret == -ERESTARTSYS)) { 4744 bool was_interruptible; 4745 4746 was_interruptible = dev_priv->mm.interruptible; 4747 dev_priv->mm.interruptible = false; 4748 4749 WARN_ON(i915_vma_unbind(vma)); 4750 4751 dev_priv->mm.interruptible = was_interruptible; 4752 } 4753 } 4754 4755 /* Stolen objects don't hold a ref, but do hold pin count. Fix that up 4756 * before progressing. */ 4757 if (obj->stolen) 4758 i915_gem_object_unpin_pages(obj); 4759 4760 WARN_ON(obj->frontbuffer_bits); 4761 4762 if (obj->pages && obj->madv == I915_MADV_WILLNEED && 4763 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES && 4764 obj->tiling_mode != I915_TILING_NONE) 4765 i915_gem_object_unpin_pages(obj); 4766 4767 if (WARN_ON(obj->pages_pin_count)) 4768 obj->pages_pin_count = 0; 4769 if (discard_backing_storage(obj)) 4770 obj->madv = I915_MADV_DONTNEED; 4771 i915_gem_object_put_pages(obj); 4772 i915_gem_object_free_mmap_offset(obj); 4773 4774 BUG_ON(obj->pages); 4775 4776 #if 0 4777 if (obj->base.import_attach) 4778 drm_prime_gem_destroy(&obj->base, NULL); 4779 #endif 4780 4781 if (obj->ops->release) 4782 obj->ops->release(obj); 4783 4784 drm_gem_object_release(&obj->base); 4785 i915_gem_info_remove_obj(dev_priv, obj->base.size); 4786 4787 kfree(obj->bit_17); 4788 i915_gem_object_free(obj); 4789 4790 intel_runtime_pm_put(dev_priv); 4791 } 4792 4793 struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, 4794 struct i915_address_space *vm) 4795 { 4796 struct i915_vma *vma; 4797 list_for_each_entry(vma, &obj->vma_list, vma_link) { 4798 if (i915_is_ggtt(vma->vm) && 4799 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 4800 continue; 4801 if (vma->vm == vm) 4802 return vma; 4803 } 4804 return NULL; 4805 } 4806 4807 struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj, 4808 const struct i915_ggtt_view *view) 4809 { 4810 struct i915_address_space *ggtt = i915_obj_to_ggtt(obj); 4811 struct i915_vma *vma; 4812 4813 if (WARN_ONCE(!view, "no view specified")) 4814 return ERR_PTR(-EINVAL); 4815 4816 list_for_each_entry(vma, &obj->vma_list, vma_link) 4817 if (vma->vm == ggtt && 4818 i915_ggtt_view_equal(&vma->ggtt_view, view)) 4819 return vma; 4820 return NULL; 4821 } 4822 4823 void i915_gem_vma_destroy(struct i915_vma *vma) 4824 { 4825 struct i915_address_space *vm = NULL; 4826 WARN_ON(vma->node.allocated); 4827 4828 /* Keep the vma as a placeholder in the execbuffer reservation lists */ 4829 if (!list_empty(&vma->exec_list)) 4830 return; 4831 4832 vm = vma->vm; 4833 4834 if (!i915_is_ggtt(vm)) 4835 i915_ppgtt_put(i915_vm_to_ppgtt(vm)); 4836 4837 list_del(&vma->vma_link); 4838 4839 kfree(vma); 4840 } 4841 4842 static void 4843 i915_gem_stop_ringbuffers(struct drm_device *dev) 4844 { 4845 struct drm_i915_private *dev_priv = dev->dev_private; 4846 struct intel_engine_cs *ring; 4847 int i; 4848 4849 for_each_ring(ring, dev_priv, i) 4850 dev_priv->gt.stop_ring(ring); 4851 } 4852 4853 int 4854 i915_gem_suspend(struct drm_device *dev) 4855 { 4856 struct drm_i915_private *dev_priv = dev->dev_private; 4857 int ret = 0; 4858 4859 mutex_lock(&dev->struct_mutex); 4860 ret = i915_gpu_idle(dev); 4861 if (ret) 4862 goto err; 4863 4864 i915_gem_retire_requests(dev); 4865 4866 i915_gem_stop_ringbuffers(dev); 4867 mutex_unlock(&dev->struct_mutex); 4868 4869 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); 4870 cancel_delayed_work_sync(&dev_priv->mm.retire_work); 4871 #if 0 4872 flush_delayed_work(&dev_priv->mm.idle_work); 4873 #endif 4874 4875 /* Assert that we sucessfully flushed all the work and 4876 * reset the GPU back to its idle, low power state. 4877 */ 4878 WARN_ON(dev_priv->mm.busy); 4879 4880 return 0; 4881 4882 err: 4883 mutex_unlock(&dev->struct_mutex); 4884 return ret; 4885 } 4886 4887 int i915_gem_l3_remap(struct intel_engine_cs *ring, int slice) 4888 { 4889 struct drm_device *dev = ring->dev; 4890 struct drm_i915_private *dev_priv = dev->dev_private; 4891 u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200); 4892 u32 *remap_info = dev_priv->l3_parity.remap_info[slice]; 4893 int i, ret; 4894 4895 if (!HAS_L3_DPF(dev) || !remap_info) 4896 return 0; 4897 4898 ret = intel_ring_begin(ring, GEN7_L3LOG_SIZE / 4 * 3); 4899 if (ret) 4900 return ret; 4901 4902 /* 4903 * Note: We do not worry about the concurrent register cacheline hang 4904 * here because no other code should access these registers other than 4905 * at initialization time. 4906 */ 4907 for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) { 4908 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 4909 intel_ring_emit(ring, reg_base + i); 4910 intel_ring_emit(ring, remap_info[i/4]); 4911 } 4912 4913 intel_ring_advance(ring); 4914 4915 return ret; 4916 } 4917 4918 void i915_gem_init_swizzling(struct drm_device *dev) 4919 { 4920 struct drm_i915_private *dev_priv = dev->dev_private; 4921 4922 if (INTEL_INFO(dev)->gen < 5 || 4923 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 4924 return; 4925 4926 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 4927 DISP_TILE_SURFACE_SWIZZLING); 4928 4929 if (IS_GEN5(dev)) 4930 return; 4931 4932 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 4933 if (IS_GEN6(dev)) 4934 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 4935 else if (IS_GEN7(dev)) 4936 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 4937 else if (IS_GEN8(dev)) 4938 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW)); 4939 else 4940 BUG(); 4941 } 4942 4943 static bool 4944 intel_enable_blt(struct drm_device *dev) 4945 { 4946 if (!HAS_BLT(dev)) 4947 return false; 4948 4949 /* The blitter was dysfunctional on early prototypes */ 4950 if (IS_GEN6(dev) && dev->pdev->revision < 8) { 4951 DRM_INFO("BLT not supported on this pre-production hardware;" 4952 " graphics performance will be degraded.\n"); 4953 return false; 4954 } 4955 4956 return true; 4957 } 4958 4959 static void init_unused_ring(struct drm_device *dev, u32 base) 4960 { 4961 struct drm_i915_private *dev_priv = dev->dev_private; 4962 4963 I915_WRITE(RING_CTL(base), 0); 4964 I915_WRITE(RING_HEAD(base), 0); 4965 I915_WRITE(RING_TAIL(base), 0); 4966 I915_WRITE(RING_START(base), 0); 4967 } 4968 4969 static void init_unused_rings(struct drm_device *dev) 4970 { 4971 if (IS_I830(dev)) { 4972 init_unused_ring(dev, PRB1_BASE); 4973 init_unused_ring(dev, SRB0_BASE); 4974 init_unused_ring(dev, SRB1_BASE); 4975 init_unused_ring(dev, SRB2_BASE); 4976 init_unused_ring(dev, SRB3_BASE); 4977 } else if (IS_GEN2(dev)) { 4978 init_unused_ring(dev, SRB0_BASE); 4979 init_unused_ring(dev, SRB1_BASE); 4980 } else if (IS_GEN3(dev)) { 4981 init_unused_ring(dev, PRB1_BASE); 4982 init_unused_ring(dev, PRB2_BASE); 4983 } 4984 } 4985 4986 int i915_gem_init_rings(struct drm_device *dev) 4987 { 4988 struct drm_i915_private *dev_priv = dev->dev_private; 4989 int ret; 4990 4991 ret = intel_init_render_ring_buffer(dev); 4992 if (ret) 4993 return ret; 4994 4995 if (HAS_BSD(dev)) { 4996 ret = intel_init_bsd_ring_buffer(dev); 4997 if (ret) 4998 goto cleanup_render_ring; 4999 } 5000 5001 if (intel_enable_blt(dev)) { 5002 ret = intel_init_blt_ring_buffer(dev); 5003 if (ret) 5004 goto cleanup_bsd_ring; 5005 } 5006 5007 if (HAS_VEBOX(dev)) { 5008 ret = intel_init_vebox_ring_buffer(dev); 5009 if (ret) 5010 goto cleanup_blt_ring; 5011 } 5012 5013 if (HAS_BSD2(dev)) { 5014 ret = intel_init_bsd2_ring_buffer(dev); 5015 if (ret) 5016 goto cleanup_vebox_ring; 5017 } 5018 5019 ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000)); 5020 if (ret) 5021 goto cleanup_bsd2_ring; 5022 5023 return 0; 5024 5025 cleanup_bsd2_ring: 5026 intel_cleanup_ring_buffer(&dev_priv->ring[VCS2]); 5027 cleanup_vebox_ring: 5028 intel_cleanup_ring_buffer(&dev_priv->ring[VECS]); 5029 cleanup_blt_ring: 5030 intel_cleanup_ring_buffer(&dev_priv->ring[BCS]); 5031 cleanup_bsd_ring: 5032 intel_cleanup_ring_buffer(&dev_priv->ring[VCS]); 5033 cleanup_render_ring: 5034 intel_cleanup_ring_buffer(&dev_priv->ring[RCS]); 5035 5036 return ret; 5037 } 5038 5039 int 5040 i915_gem_init_hw(struct drm_device *dev) 5041 { 5042 struct drm_i915_private *dev_priv = dev->dev_private; 5043 struct intel_engine_cs *ring; 5044 int ret, i; 5045 5046 #if 0 5047 if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) 5048 return -EIO; 5049 #endif 5050 5051 /* Double layer security blanket, see i915_gem_init() */ 5052 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5053 5054 if (dev_priv->ellc_size) 5055 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf)); 5056 5057 if (IS_HASWELL(dev)) 5058 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev) ? 5059 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); 5060 5061 if (HAS_PCH_NOP(dev)) { 5062 if (IS_IVYBRIDGE(dev)) { 5063 u32 temp = I915_READ(GEN7_MSG_CTL); 5064 temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK); 5065 I915_WRITE(GEN7_MSG_CTL, temp); 5066 } else if (INTEL_INFO(dev)->gen >= 7) { 5067 u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT); 5068 temp &= ~RESET_PCH_HANDSHAKE_ENABLE; 5069 I915_WRITE(HSW_NDE_RSTWRN_OPT, temp); 5070 } 5071 } 5072 5073 i915_gem_init_swizzling(dev); 5074 5075 /* 5076 * At least 830 can leave some of the unused rings 5077 * "active" (ie. head != tail) after resume which 5078 * will prevent c3 entry. Makes sure all unused rings 5079 * are totally idle. 5080 */ 5081 init_unused_rings(dev); 5082 5083 for_each_ring(ring, dev_priv, i) { 5084 ret = ring->init_hw(ring); 5085 if (ret) 5086 goto out; 5087 } 5088 5089 for (i = 0; i < NUM_L3_SLICES(dev); i++) 5090 i915_gem_l3_remap(&dev_priv->ring[RCS], i); 5091 5092 ret = i915_ppgtt_init_hw(dev); 5093 if (ret && ret != -EIO) { 5094 DRM_ERROR("PPGTT enable failed %d\n", ret); 5095 i915_gem_cleanup_ringbuffer(dev); 5096 } 5097 5098 ret = i915_gem_context_enable(dev_priv); 5099 if (ret && ret != -EIO) { 5100 DRM_ERROR("Context enable failed %d\n", ret); 5101 i915_gem_cleanup_ringbuffer(dev); 5102 5103 goto out; 5104 } 5105 5106 out: 5107 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5108 return ret; 5109 } 5110 5111 int i915_gem_init(struct drm_device *dev) 5112 { 5113 struct drm_i915_private *dev_priv = dev->dev_private; 5114 int ret; 5115 5116 i915.enable_execlists = intel_sanitize_enable_execlists(dev, 5117 i915.enable_execlists); 5118 5119 mutex_lock(&dev->struct_mutex); 5120 5121 if (IS_VALLEYVIEW(dev)) { 5122 /* VLVA0 (potential hack), BIOS isn't actually waking us */ 5123 I915_WRITE(VLV_GTLC_WAKE_CTRL, VLV_GTLC_ALLOWWAKEREQ); 5124 if (wait_for((I915_READ(VLV_GTLC_PW_STATUS) & 5125 VLV_GTLC_ALLOWWAKEACK), 10)) 5126 DRM_DEBUG_DRIVER("allow wake ack timed out\n"); 5127 } 5128 5129 if (!i915.enable_execlists) { 5130 dev_priv->gt.execbuf_submit = i915_gem_ringbuffer_submission; 5131 dev_priv->gt.init_rings = i915_gem_init_rings; 5132 dev_priv->gt.cleanup_ring = intel_cleanup_ring_buffer; 5133 dev_priv->gt.stop_ring = intel_stop_ring_buffer; 5134 } else { 5135 dev_priv->gt.execbuf_submit = intel_execlists_submission; 5136 dev_priv->gt.init_rings = intel_logical_rings_init; 5137 dev_priv->gt.cleanup_ring = intel_logical_ring_cleanup; 5138 dev_priv->gt.stop_ring = intel_logical_ring_stop; 5139 } 5140 5141 /* This is just a security blanket to placate dragons. 5142 * On some systems, we very sporadically observe that the first TLBs 5143 * used by the CS may be stale, despite us poking the TLB reset. If 5144 * we hold the forcewake during initialisation these problems 5145 * just magically go away. 5146 */ 5147 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5148 5149 ret = i915_gem_init_userptr(dev); 5150 if (ret) 5151 goto out_unlock; 5152 5153 i915_gem_init_global_gtt(dev); 5154 5155 ret = i915_gem_context_init(dev); 5156 if (ret) 5157 goto out_unlock; 5158 5159 ret = dev_priv->gt.init_rings(dev); 5160 if (ret) 5161 goto out_unlock; 5162 5163 ret = i915_gem_init_hw(dev); 5164 if (ret == -EIO) { 5165 /* Allow ring initialisation to fail by marking the GPU as 5166 * wedged. But we only want to do this where the GPU is angry, 5167 * for all other failure, such as an allocation failure, bail. 5168 */ 5169 DRM_ERROR("Failed to initialize GPU, declaring it wedged\n"); 5170 atomic_set_mask(I915_WEDGED, &dev_priv->gpu_error.reset_counter); 5171 ret = 0; 5172 } 5173 5174 out_unlock: 5175 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5176 mutex_unlock(&dev->struct_mutex); 5177 5178 return ret; 5179 } 5180 5181 void 5182 i915_gem_cleanup_ringbuffer(struct drm_device *dev) 5183 { 5184 struct drm_i915_private *dev_priv = dev->dev_private; 5185 struct intel_engine_cs *ring; 5186 int i; 5187 5188 for_each_ring(ring, dev_priv, i) 5189 dev_priv->gt.cleanup_ring(ring); 5190 } 5191 5192 static void 5193 init_ring_lists(struct intel_engine_cs *ring) 5194 { 5195 INIT_LIST_HEAD(&ring->active_list); 5196 INIT_LIST_HEAD(&ring->request_list); 5197 } 5198 5199 void i915_init_vm(struct drm_i915_private *dev_priv, 5200 struct i915_address_space *vm) 5201 { 5202 if (!i915_is_ggtt(vm)) 5203 drm_mm_init(&vm->mm, vm->start, vm->total); 5204 vm->dev = dev_priv->dev; 5205 INIT_LIST_HEAD(&vm->active_list); 5206 INIT_LIST_HEAD(&vm->inactive_list); 5207 INIT_LIST_HEAD(&vm->global_link); 5208 list_add_tail(&vm->global_link, &dev_priv->vm_list); 5209 } 5210 5211 void 5212 i915_gem_load(struct drm_device *dev) 5213 { 5214 struct drm_i915_private *dev_priv = dev->dev_private; 5215 int i; 5216 5217 INIT_LIST_HEAD(&dev_priv->vm_list); 5218 i915_init_vm(dev_priv, &dev_priv->gtt.base); 5219 5220 INIT_LIST_HEAD(&dev_priv->context_list); 5221 INIT_LIST_HEAD(&dev_priv->mm.unbound_list); 5222 INIT_LIST_HEAD(&dev_priv->mm.bound_list); 5223 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 5224 for (i = 0; i < I915_NUM_RINGS; i++) 5225 init_ring_lists(&dev_priv->ring[i]); 5226 for (i = 0; i < I915_MAX_NUM_FENCES; i++) 5227 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); 5228 INIT_DELAYED_WORK(&dev_priv->mm.retire_work, 5229 i915_gem_retire_work_handler); 5230 INIT_DELAYED_WORK(&dev_priv->mm.idle_work, 5231 i915_gem_idle_work_handler); 5232 init_waitqueue_head(&dev_priv->gpu_error.reset_queue); 5233 5234 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; 5235 5236 if (INTEL_INFO(dev)->gen >= 7 && !IS_VALLEYVIEW(dev)) 5237 dev_priv->num_fence_regs = 32; 5238 else if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) 5239 dev_priv->num_fence_regs = 16; 5240 else 5241 dev_priv->num_fence_regs = 8; 5242 5243 if (intel_vgpu_active(dev)) 5244 dev_priv->num_fence_regs = 5245 I915_READ(vgtif_reg(avail_rs.fence_num)); 5246 5247 /* Initialize fence registers to zero */ 5248 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 5249 i915_gem_restore_fences(dev); 5250 5251 i915_gem_detect_bit_6_swizzle(dev); 5252 init_waitqueue_head(&dev_priv->pending_flip_queue); 5253 5254 dev_priv->mm.interruptible = true; 5255 5256 i915_gem_shrinker_init(dev_priv); 5257 5258 lockinit(&dev_priv->fb_tracking.lock, "drmftl", 0, LK_CANRECURSE); 5259 } 5260 5261 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 5262 { 5263 struct drm_i915_file_private *file_priv = file->driver_priv; 5264 5265 /* Clean up our request list when the client is going away, so that 5266 * later retire_requests won't dereference our soon-to-be-gone 5267 * file_priv. 5268 */ 5269 spin_lock(&file_priv->mm.lock); 5270 while (!list_empty(&file_priv->mm.request_list)) { 5271 struct drm_i915_gem_request *request; 5272 5273 request = list_first_entry(&file_priv->mm.request_list, 5274 struct drm_i915_gem_request, 5275 client_list); 5276 list_del(&request->client_list); 5277 request->file_priv = NULL; 5278 } 5279 spin_unlock(&file_priv->mm.lock); 5280 5281 if (!list_empty(&file_priv->rps.link)) { 5282 spin_lock(&to_i915(dev)->rps.client_lock); 5283 list_del(&file_priv->rps.link); 5284 spin_unlock(&to_i915(dev)->rps.client_lock); 5285 } 5286 } 5287 5288 int 5289 i915_gem_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, 5290 vm_ooffset_t foff, struct ucred *cred, u_short *color) 5291 { 5292 *color = 0; /* XXXKIB */ 5293 return (0); 5294 } 5295 5296 void 5297 i915_gem_pager_dtor(void *handle) 5298 { 5299 struct drm_gem_object *obj; 5300 struct drm_device *dev; 5301 5302 obj = handle; 5303 dev = obj->dev; 5304 5305 mutex_lock(&dev->struct_mutex); 5306 drm_gem_free_mmap_offset(obj); 5307 i915_gem_release_mmap(to_intel_bo(obj)); 5308 drm_gem_object_unreference(obj); 5309 mutex_unlock(&dev->struct_mutex); 5310 } 5311 5312 int i915_gem_open(struct drm_device *dev, struct drm_file *file) 5313 { 5314 struct drm_i915_file_private *file_priv; 5315 int ret; 5316 5317 DRM_DEBUG_DRIVER("\n"); 5318 5319 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 5320 if (!file_priv) 5321 return -ENOMEM; 5322 5323 file->driver_priv = file_priv; 5324 file_priv->dev_priv = dev->dev_private; 5325 file_priv->file = file; 5326 INIT_LIST_HEAD(&file_priv->rps.link); 5327 5328 spin_init(&file_priv->mm.lock, "i915_priv"); 5329 INIT_LIST_HEAD(&file_priv->mm.request_list); 5330 5331 ret = i915_gem_context_open(dev, file); 5332 if (ret) 5333 kfree(file_priv); 5334 5335 return ret; 5336 } 5337 5338 /** 5339 * i915_gem_track_fb - update frontbuffer tracking 5340 * old: current GEM buffer for the frontbuffer slots 5341 * new: new GEM buffer for the frontbuffer slots 5342 * frontbuffer_bits: bitmask of frontbuffer slots 5343 * 5344 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them 5345 * from @old and setting them in @new. Both @old and @new can be NULL. 5346 */ 5347 void i915_gem_track_fb(struct drm_i915_gem_object *old, 5348 struct drm_i915_gem_object *new, 5349 unsigned frontbuffer_bits) 5350 { 5351 if (old) { 5352 WARN_ON(!mutex_is_locked(&old->base.dev->struct_mutex)); 5353 WARN_ON(!(old->frontbuffer_bits & frontbuffer_bits)); 5354 old->frontbuffer_bits &= ~frontbuffer_bits; 5355 } 5356 5357 if (new) { 5358 WARN_ON(!mutex_is_locked(&new->base.dev->struct_mutex)); 5359 WARN_ON(new->frontbuffer_bits & frontbuffer_bits); 5360 new->frontbuffer_bits |= frontbuffer_bits; 5361 } 5362 } 5363 5364 /* All the new VM stuff */ 5365 unsigned long 5366 i915_gem_obj_offset(struct drm_i915_gem_object *o, 5367 struct i915_address_space *vm) 5368 { 5369 struct drm_i915_private *dev_priv = o->base.dev->dev_private; 5370 struct i915_vma *vma; 5371 5372 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); 5373 5374 list_for_each_entry(vma, &o->vma_list, vma_link) { 5375 if (i915_is_ggtt(vma->vm) && 5376 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5377 continue; 5378 if (vma->vm == vm) 5379 return vma->node.start; 5380 } 5381 5382 WARN(1, "%s vma for this object not found.\n", 5383 i915_is_ggtt(vm) ? "global" : "ppgtt"); 5384 return -1; 5385 } 5386 5387 unsigned long 5388 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o, 5389 const struct i915_ggtt_view *view) 5390 { 5391 struct i915_address_space *ggtt = i915_obj_to_ggtt(o); 5392 struct i915_vma *vma; 5393 5394 list_for_each_entry(vma, &o->vma_list, vma_link) 5395 if (vma->vm == ggtt && 5396 i915_ggtt_view_equal(&vma->ggtt_view, view)) 5397 return vma->node.start; 5398 5399 WARN(1, "global vma for this object not found. (view=%u)\n", view->type); 5400 return -1; 5401 } 5402 5403 bool i915_gem_obj_bound(struct drm_i915_gem_object *o, 5404 struct i915_address_space *vm) 5405 { 5406 struct i915_vma *vma; 5407 5408 list_for_each_entry(vma, &o->vma_list, vma_link) { 5409 if (i915_is_ggtt(vma->vm) && 5410 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5411 continue; 5412 if (vma->vm == vm && drm_mm_node_allocated(&vma->node)) 5413 return true; 5414 } 5415 5416 return false; 5417 } 5418 5419 bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o, 5420 const struct i915_ggtt_view *view) 5421 { 5422 struct i915_address_space *ggtt = i915_obj_to_ggtt(o); 5423 struct i915_vma *vma; 5424 5425 list_for_each_entry(vma, &o->vma_list, vma_link) 5426 if (vma->vm == ggtt && 5427 i915_ggtt_view_equal(&vma->ggtt_view, view) && 5428 drm_mm_node_allocated(&vma->node)) 5429 return true; 5430 5431 return false; 5432 } 5433 5434 bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o) 5435 { 5436 struct i915_vma *vma; 5437 5438 list_for_each_entry(vma, &o->vma_list, vma_link) 5439 if (drm_mm_node_allocated(&vma->node)) 5440 return true; 5441 5442 return false; 5443 } 5444 5445 unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o, 5446 struct i915_address_space *vm) 5447 { 5448 struct drm_i915_private *dev_priv = o->base.dev->dev_private; 5449 struct i915_vma *vma; 5450 5451 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); 5452 5453 BUG_ON(list_empty(&o->vma_list)); 5454 5455 list_for_each_entry(vma, &o->vma_list, vma_link) { 5456 if (i915_is_ggtt(vma->vm) && 5457 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5458 continue; 5459 if (vma->vm == vm) 5460 return vma->node.size; 5461 } 5462 return 0; 5463 } 5464 5465 bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj) 5466 { 5467 struct i915_vma *vma; 5468 list_for_each_entry(vma, &obj->vma_list, vma_link) 5469 if (vma->pin_count > 0) 5470 return true; 5471 5472 return false; 5473 } 5474 5475