1 /* 2 * Copyright © 2008-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include <drm/drmP.h> 29 #include <drm/drm_vma_manager.h> 30 #include <drm/i915_drm.h> 31 #include "i915_drv.h" 32 #include "i915_vgpu.h" 33 #include "i915_trace.h" 34 #include "intel_drv.h" 35 #include <linux/shmem_fs.h> 36 #include <linux/slab.h> 37 #include <linux/swap.h> 38 #include <linux/pci.h> 39 40 #define RQ_BUG_ON(expr) 41 42 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); 43 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); 44 static void 45 i915_gem_object_retire__write(struct drm_i915_gem_object *obj); 46 static void 47 i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring); 48 49 static bool cpu_cache_is_coherent(struct drm_device *dev, 50 enum i915_cache_level level) 51 { 52 return HAS_LLC(dev) || level != I915_CACHE_NONE; 53 } 54 55 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 56 { 57 if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) 58 return true; 59 60 return obj->pin_display; 61 } 62 63 /* some bookkeeping */ 64 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 65 size_t size) 66 { 67 spin_lock(&dev_priv->mm.object_stat_lock); 68 dev_priv->mm.object_count++; 69 dev_priv->mm.object_memory += size; 70 spin_unlock(&dev_priv->mm.object_stat_lock); 71 } 72 73 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 74 size_t size) 75 { 76 spin_lock(&dev_priv->mm.object_stat_lock); 77 dev_priv->mm.object_count--; 78 dev_priv->mm.object_memory -= size; 79 spin_unlock(&dev_priv->mm.object_stat_lock); 80 } 81 82 static int 83 i915_gem_wait_for_error(struct i915_gpu_error *error) 84 { 85 int ret; 86 87 #define EXIT_COND (!i915_reset_in_progress(error) || \ 88 i915_terminally_wedged(error)) 89 if (EXIT_COND) 90 return 0; 91 92 /* 93 * Only wait 10 seconds for the gpu reset to complete to avoid hanging 94 * userspace. If it takes that long something really bad is going on and 95 * we should simply try to bail out and fail as gracefully as possible. 96 */ 97 ret = wait_event_interruptible_timeout(error->reset_queue, 98 EXIT_COND, 99 10*HZ); 100 if (ret == 0) { 101 DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); 102 return -EIO; 103 } else if (ret < 0) { 104 return ret; 105 } 106 #undef EXIT_COND 107 108 return 0; 109 } 110 111 int i915_mutex_lock_interruptible(struct drm_device *dev) 112 { 113 struct drm_i915_private *dev_priv = dev->dev_private; 114 int ret; 115 116 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 117 if (ret) 118 return ret; 119 120 ret = mutex_lock_interruptible(&dev->struct_mutex); 121 if (ret) 122 return ret; 123 124 WARN_ON(i915_verify_lists(dev)); 125 return 0; 126 } 127 128 int 129 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 130 struct drm_file *file) 131 { 132 struct drm_i915_private *dev_priv = dev->dev_private; 133 struct drm_i915_gem_get_aperture *args = data; 134 struct i915_gtt *ggtt = &dev_priv->gtt; 135 struct i915_vma *vma; 136 size_t pinned; 137 138 pinned = 0; 139 mutex_lock(&dev->struct_mutex); 140 list_for_each_entry(vma, &ggtt->base.active_list, mm_list) 141 if (vma->pin_count) 142 pinned += vma->node.size; 143 list_for_each_entry(vma, &ggtt->base.inactive_list, mm_list) 144 if (vma->pin_count) 145 pinned += vma->node.size; 146 mutex_unlock(&dev->struct_mutex); 147 148 args->aper_size = dev_priv->gtt.base.total; 149 args->aper_available_size = args->aper_size - pinned; 150 151 return 0; 152 } 153 154 #if 0 155 static int 156 i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) 157 { 158 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; 159 char *vaddr = obj->phys_handle->vaddr; 160 struct sg_table *st; 161 struct scatterlist *sg; 162 int i; 163 164 if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) 165 return -EINVAL; 166 167 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 168 struct page *page; 169 char *src; 170 171 page = shmem_read_mapping_page(mapping, i); 172 if (IS_ERR(page)) 173 return PTR_ERR(page); 174 175 src = kmap_atomic(page); 176 memcpy(vaddr, src, PAGE_SIZE); 177 drm_clflush_virt_range(vaddr, PAGE_SIZE); 178 kunmap_atomic(src); 179 180 page_cache_release(page); 181 vaddr += PAGE_SIZE; 182 } 183 184 i915_gem_chipset_flush(obj->base.dev); 185 186 st = kmalloc(sizeof(*st), GFP_KERNEL); 187 if (st == NULL) 188 return -ENOMEM; 189 190 if (sg_alloc_table(st, 1, GFP_KERNEL)) { 191 kfree(st); 192 return -ENOMEM; 193 } 194 195 sg = st->sgl; 196 sg->offset = 0; 197 sg->length = obj->base.size; 198 199 sg_dma_address(sg) = obj->phys_handle->busaddr; 200 sg_dma_len(sg) = obj->base.size; 201 202 obj->pages = st; 203 return 0; 204 } 205 206 static void 207 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj) 208 { 209 int ret; 210 211 BUG_ON(obj->madv == __I915_MADV_PURGED); 212 213 ret = i915_gem_object_set_to_cpu_domain(obj, true); 214 if (ret) { 215 /* In the event of a disaster, abandon all caches and 216 * hope for the best. 217 */ 218 WARN_ON(ret != -EIO); 219 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 220 } 221 222 if (obj->madv == I915_MADV_DONTNEED) 223 obj->dirty = 0; 224 225 if (obj->dirty) { 226 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; 227 char *vaddr = obj->phys_handle->vaddr; 228 int i; 229 230 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 231 struct page *page; 232 char *dst; 233 234 page = shmem_read_mapping_page(mapping, i); 235 if (IS_ERR(page)) 236 continue; 237 238 dst = kmap_atomic(page); 239 drm_clflush_virt_range(vaddr, PAGE_SIZE); 240 memcpy(dst, vaddr, PAGE_SIZE); 241 kunmap_atomic(dst); 242 243 set_page_dirty(page); 244 if (obj->madv == I915_MADV_WILLNEED) 245 mark_page_accessed(page); 246 page_cache_release(page); 247 vaddr += PAGE_SIZE; 248 } 249 obj->dirty = 0; 250 } 251 252 sg_free_table(obj->pages); 253 kfree(obj->pages); 254 } 255 256 static void 257 i915_gem_object_release_phys(struct drm_i915_gem_object *obj) 258 { 259 drm_pci_free(obj->base.dev, obj->phys_handle); 260 } 261 262 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { 263 .get_pages = i915_gem_object_get_pages_phys, 264 .put_pages = i915_gem_object_put_pages_phys, 265 .release = i915_gem_object_release_phys, 266 }; 267 #endif 268 269 static int 270 drop_pages(struct drm_i915_gem_object *obj) 271 { 272 struct i915_vma *vma, *next; 273 int ret; 274 275 drm_gem_object_reference(&obj->base); 276 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) 277 if (i915_vma_unbind(vma)) 278 break; 279 280 ret = i915_gem_object_put_pages(obj); 281 drm_gem_object_unreference(&obj->base); 282 283 return ret; 284 } 285 286 int 287 i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, 288 int align) 289 { 290 drm_dma_handle_t *phys; 291 int ret; 292 293 if (obj->phys_handle) { 294 if ((unsigned long)obj->phys_handle->vaddr & (align -1)) 295 return -EBUSY; 296 297 return 0; 298 } 299 300 if (obj->madv != I915_MADV_WILLNEED) 301 return -EFAULT; 302 303 #if 0 304 if (obj->base.filp == NULL) 305 return -EINVAL; 306 #endif 307 308 ret = drop_pages(obj); 309 if (ret) 310 return ret; 311 312 /* create a new object */ 313 phys = drm_pci_alloc(obj->base.dev, obj->base.size, align); 314 if (!phys) 315 return -ENOMEM; 316 317 obj->phys_handle = phys; 318 #if 0 319 obj->ops = &i915_gem_phys_ops; 320 #endif 321 322 return i915_gem_object_get_pages(obj); 323 } 324 325 static int 326 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, 327 struct drm_i915_gem_pwrite *args, 328 struct drm_file *file_priv) 329 { 330 struct drm_device *dev = obj->base.dev; 331 void *vaddr = (char *)obj->phys_handle->vaddr + args->offset; 332 char __user *user_data = to_user_ptr(args->data_ptr); 333 int ret = 0; 334 335 /* We manually control the domain here and pretend that it 336 * remains coherent i.e. in the GTT domain, like shmem_pwrite. 337 */ 338 ret = i915_gem_object_wait_rendering(obj, false); 339 if (ret) 340 return ret; 341 342 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 343 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) { 344 unsigned long unwritten; 345 346 /* The physical object once assigned is fixed for the lifetime 347 * of the obj, so we can safely drop the lock and continue 348 * to access vaddr. 349 */ 350 mutex_unlock(&dev->struct_mutex); 351 unwritten = copy_from_user(vaddr, user_data, args->size); 352 mutex_lock(&dev->struct_mutex); 353 if (unwritten) { 354 ret = -EFAULT; 355 goto out; 356 } 357 } 358 359 drm_clflush_virt_range(vaddr, args->size); 360 i915_gem_chipset_flush(dev); 361 362 out: 363 intel_fb_obj_flush(obj, false, ORIGIN_CPU); 364 return ret; 365 } 366 367 void *i915_gem_object_alloc(struct drm_device *dev) 368 { 369 return kmalloc(sizeof(struct drm_i915_gem_object), 370 M_DRM, M_WAITOK | M_ZERO); 371 } 372 373 void i915_gem_object_free(struct drm_i915_gem_object *obj) 374 { 375 kfree(obj); 376 } 377 378 static int 379 i915_gem_create(struct drm_file *file, 380 struct drm_device *dev, 381 uint64_t size, 382 uint32_t *handle_p) 383 { 384 struct drm_i915_gem_object *obj; 385 int ret; 386 u32 handle; 387 388 size = roundup(size, PAGE_SIZE); 389 if (size == 0) 390 return -EINVAL; 391 392 /* Allocate the new object */ 393 obj = i915_gem_alloc_object(dev, size); 394 if (obj == NULL) 395 return -ENOMEM; 396 397 ret = drm_gem_handle_create(file, &obj->base, &handle); 398 /* drop reference from allocate - handle holds it now */ 399 drm_gem_object_unreference_unlocked(&obj->base); 400 if (ret) 401 return ret; 402 403 *handle_p = handle; 404 return 0; 405 } 406 407 int 408 i915_gem_dumb_create(struct drm_file *file, 409 struct drm_device *dev, 410 struct drm_mode_create_dumb *args) 411 { 412 /* have to work out size/pitch and return them */ 413 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); 414 args->size = args->pitch * args->height; 415 return i915_gem_create(file, dev, 416 args->size, &args->handle); 417 } 418 419 /** 420 * Creates a new mm object and returns a handle to it. 421 */ 422 int 423 i915_gem_create_ioctl(struct drm_device *dev, void *data, 424 struct drm_file *file) 425 { 426 struct drm_i915_gem_create *args = data; 427 428 return i915_gem_create(file, dev, 429 args->size, &args->handle); 430 } 431 432 static inline int 433 __copy_to_user_swizzled(char __user *cpu_vaddr, 434 const char *gpu_vaddr, int gpu_offset, 435 int length) 436 { 437 int ret, cpu_offset = 0; 438 439 while (length > 0) { 440 int cacheline_end = ALIGN(gpu_offset + 1, 64); 441 int this_length = min(cacheline_end - gpu_offset, length); 442 int swizzled_gpu_offset = gpu_offset ^ 64; 443 444 ret = __copy_to_user(cpu_vaddr + cpu_offset, 445 gpu_vaddr + swizzled_gpu_offset, 446 this_length); 447 if (ret) 448 return ret + length; 449 450 cpu_offset += this_length; 451 gpu_offset += this_length; 452 length -= this_length; 453 } 454 455 return 0; 456 } 457 458 static inline int 459 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, 460 const char __user *cpu_vaddr, 461 int length) 462 { 463 int ret, cpu_offset = 0; 464 465 while (length > 0) { 466 int cacheline_end = ALIGN(gpu_offset + 1, 64); 467 int this_length = min(cacheline_end - gpu_offset, length); 468 int swizzled_gpu_offset = gpu_offset ^ 64; 469 470 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, 471 cpu_vaddr + cpu_offset, 472 this_length); 473 if (ret) 474 return ret + length; 475 476 cpu_offset += this_length; 477 gpu_offset += this_length; 478 length -= this_length; 479 } 480 481 return 0; 482 } 483 484 /* 485 * Pins the specified object's pages and synchronizes the object with 486 * GPU accesses. Sets needs_clflush to non-zero if the caller should 487 * flush the object from the CPU cache. 488 */ 489 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, 490 int *needs_clflush) 491 { 492 int ret; 493 494 *needs_clflush = 0; 495 496 #if 0 497 if (!obj->base.filp) 498 return -EINVAL; 499 #endif 500 501 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) { 502 /* If we're not in the cpu read domain, set ourself into the gtt 503 * read domain and manually flush cachelines (if required). This 504 * optimizes for the case when the gpu will dirty the data 505 * anyway again before the next pread happens. */ 506 *needs_clflush = !cpu_cache_is_coherent(obj->base.dev, 507 obj->cache_level); 508 ret = i915_gem_object_wait_rendering(obj, true); 509 if (ret) 510 return ret; 511 } 512 513 ret = i915_gem_object_get_pages(obj); 514 if (ret) 515 return ret; 516 517 i915_gem_object_pin_pages(obj); 518 519 return ret; 520 } 521 522 /* Per-page copy function for the shmem pread fastpath. 523 * Flushes invalid cachelines before reading the target if 524 * needs_clflush is set. */ 525 static int 526 shmem_pread_fast(struct vm_page *page, int shmem_page_offset, int page_length, 527 char __user *user_data, 528 bool page_do_bit17_swizzling, bool needs_clflush) 529 { 530 char *vaddr; 531 int ret; 532 533 if (unlikely(page_do_bit17_swizzling)) 534 return -EINVAL; 535 536 vaddr = kmap_atomic(page); 537 if (needs_clflush) 538 drm_clflush_virt_range(vaddr + shmem_page_offset, 539 page_length); 540 ret = __copy_to_user_inatomic(user_data, 541 vaddr + shmem_page_offset, 542 page_length); 543 kunmap_atomic(vaddr); 544 545 return ret ? -EFAULT : 0; 546 } 547 548 static void 549 shmem_clflush_swizzled_range(char *addr, unsigned long length, 550 bool swizzled) 551 { 552 if (unlikely(swizzled)) { 553 unsigned long start = (unsigned long) addr; 554 unsigned long end = (unsigned long) addr + length; 555 556 /* For swizzling simply ensure that we always flush both 557 * channels. Lame, but simple and it works. Swizzled 558 * pwrite/pread is far from a hotpath - current userspace 559 * doesn't use it at all. */ 560 start = round_down(start, 128); 561 end = round_up(end, 128); 562 563 drm_clflush_virt_range((void *)start, end - start); 564 } else { 565 drm_clflush_virt_range(addr, length); 566 } 567 568 } 569 570 /* Only difference to the fast-path function is that this can handle bit17 571 * and uses non-atomic copy and kmap functions. */ 572 static int 573 shmem_pread_slow(struct vm_page *page, int shmem_page_offset, int page_length, 574 char __user *user_data, 575 bool page_do_bit17_swizzling, bool needs_clflush) 576 { 577 char *vaddr; 578 int ret; 579 580 vaddr = kmap(page); 581 if (needs_clflush) 582 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 583 page_length, 584 page_do_bit17_swizzling); 585 586 if (page_do_bit17_swizzling) 587 ret = __copy_to_user_swizzled(user_data, 588 vaddr, shmem_page_offset, 589 page_length); 590 else 591 ret = __copy_to_user(user_data, 592 vaddr + shmem_page_offset, 593 page_length); 594 kunmap(page); 595 596 return ret ? - EFAULT : 0; 597 } 598 599 static int 600 i915_gem_shmem_pread(struct drm_device *dev, 601 struct drm_i915_gem_object *obj, 602 struct drm_i915_gem_pread *args, 603 struct drm_file *file) 604 { 605 char __user *user_data; 606 ssize_t remain; 607 loff_t offset; 608 int shmem_page_offset, page_length, ret = 0; 609 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 610 int prefaulted = 0; 611 int needs_clflush = 0; 612 struct sg_page_iter sg_iter; 613 614 user_data = to_user_ptr(args->data_ptr); 615 remain = args->size; 616 617 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 618 619 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); 620 if (ret) 621 return ret; 622 623 offset = args->offset; 624 625 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 626 offset >> PAGE_SHIFT) { 627 struct vm_page *page = sg_page_iter_page(&sg_iter); 628 629 if (remain <= 0) 630 break; 631 632 /* Operation in this page 633 * 634 * shmem_page_offset = offset within page in shmem file 635 * page_length = bytes to copy for this page 636 */ 637 shmem_page_offset = offset_in_page(offset); 638 page_length = remain; 639 if ((shmem_page_offset + page_length) > PAGE_SIZE) 640 page_length = PAGE_SIZE - shmem_page_offset; 641 642 page_do_bit17_swizzling = obj_do_bit17_swizzling && 643 (page_to_phys(page) & (1 << 17)) != 0; 644 645 ret = shmem_pread_fast(page, shmem_page_offset, page_length, 646 user_data, page_do_bit17_swizzling, 647 needs_clflush); 648 if (ret == 0) 649 goto next_page; 650 651 mutex_unlock(&dev->struct_mutex); 652 653 if (likely(!i915.prefault_disable) && !prefaulted) { 654 ret = fault_in_multipages_writeable(user_data, remain); 655 /* Userspace is tricking us, but we've already clobbered 656 * its pages with the prefault and promised to write the 657 * data up to the first fault. Hence ignore any errors 658 * and just continue. */ 659 (void)ret; 660 prefaulted = 1; 661 } 662 663 ret = shmem_pread_slow(page, shmem_page_offset, page_length, 664 user_data, page_do_bit17_swizzling, 665 needs_clflush); 666 667 mutex_lock(&dev->struct_mutex); 668 669 if (ret) 670 goto out; 671 672 next_page: 673 remain -= page_length; 674 user_data += page_length; 675 offset += page_length; 676 } 677 678 out: 679 i915_gem_object_unpin_pages(obj); 680 681 return ret; 682 } 683 684 /** 685 * Reads data from the object referenced by handle. 686 * 687 * On error, the contents of *data are undefined. 688 */ 689 int 690 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 691 struct drm_file *file) 692 { 693 struct drm_i915_gem_pread *args = data; 694 struct drm_i915_gem_object *obj; 695 int ret = 0; 696 697 if (args->size == 0) 698 return 0; 699 700 ret = i915_mutex_lock_interruptible(dev); 701 if (ret) 702 return ret; 703 704 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 705 if (&obj->base == NULL) { 706 ret = -ENOENT; 707 goto unlock; 708 } 709 710 /* Bounds check source. */ 711 if (args->offset > obj->base.size || 712 args->size > obj->base.size - args->offset) { 713 ret = -EINVAL; 714 goto out; 715 } 716 717 /* prime objects have no backing filp to GEM pread/pwrite 718 * pages from. 719 */ 720 721 trace_i915_gem_object_pread(obj, args->offset, args->size); 722 723 ret = i915_gem_shmem_pread(dev, obj, args, file); 724 725 out: 726 drm_gem_object_unreference(&obj->base); 727 unlock: 728 mutex_unlock(&dev->struct_mutex); 729 return ret; 730 } 731 732 /* This is the fast write path which cannot handle 733 * page faults in the source data 734 */ 735 736 static inline int 737 fast_user_write(struct io_mapping *mapping, 738 loff_t page_base, int page_offset, 739 char __user *user_data, 740 int length) 741 { 742 void __iomem *vaddr_atomic; 743 void *vaddr; 744 unsigned long unwritten; 745 746 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); 747 /* We can use the cpu mem copy function because this is X86. */ 748 vaddr = (char __force*)vaddr_atomic + page_offset; 749 unwritten = __copy_from_user_inatomic_nocache(vaddr, 750 user_data, length); 751 io_mapping_unmap_atomic(vaddr_atomic); 752 return unwritten; 753 } 754 755 /** 756 * This is the fast pwrite path, where we copy the data directly from the 757 * user into the GTT, uncached. 758 */ 759 static int 760 i915_gem_gtt_pwrite_fast(struct drm_device *dev, 761 struct drm_i915_gem_object *obj, 762 struct drm_i915_gem_pwrite *args, 763 struct drm_file *file) 764 { 765 struct drm_i915_private *dev_priv = dev->dev_private; 766 ssize_t remain; 767 loff_t offset, page_base; 768 char __user *user_data; 769 int page_offset, page_length, ret; 770 771 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK); 772 if (ret) 773 goto out; 774 775 ret = i915_gem_object_set_to_gtt_domain(obj, true); 776 if (ret) 777 goto out_unpin; 778 779 ret = i915_gem_object_put_fence(obj); 780 if (ret) 781 goto out_unpin; 782 783 user_data = to_user_ptr(args->data_ptr); 784 remain = args->size; 785 786 offset = i915_gem_obj_ggtt_offset(obj) + args->offset; 787 788 intel_fb_obj_invalidate(obj, ORIGIN_GTT); 789 790 while (remain > 0) { 791 /* Operation in this page 792 * 793 * page_base = page offset within aperture 794 * page_offset = offset within page 795 * page_length = bytes to copy for this page 796 */ 797 page_base = offset & ~PAGE_MASK; 798 page_offset = offset_in_page(offset); 799 page_length = remain; 800 if ((page_offset + remain) > PAGE_SIZE) 801 page_length = PAGE_SIZE - page_offset; 802 803 /* If we get a fault while copying data, then (presumably) our 804 * source page isn't available. Return the error and we'll 805 * retry in the slow path. 806 */ 807 if (fast_user_write(dev_priv->gtt.mappable, page_base, 808 page_offset, user_data, page_length)) { 809 ret = -EFAULT; 810 goto out_flush; 811 } 812 813 remain -= page_length; 814 user_data += page_length; 815 offset += page_length; 816 } 817 818 out_flush: 819 intel_fb_obj_flush(obj, false, ORIGIN_GTT); 820 out_unpin: 821 i915_gem_object_ggtt_unpin(obj); 822 out: 823 return ret; 824 } 825 826 /* Per-page copy function for the shmem pwrite fastpath. 827 * Flushes invalid cachelines before writing to the target if 828 * needs_clflush_before is set and flushes out any written cachelines after 829 * writing if needs_clflush is set. */ 830 static int 831 shmem_pwrite_fast(struct vm_page *page, int shmem_page_offset, int page_length, 832 char __user *user_data, 833 bool page_do_bit17_swizzling, 834 bool needs_clflush_before, 835 bool needs_clflush_after) 836 { 837 char *vaddr; 838 int ret; 839 840 if (unlikely(page_do_bit17_swizzling)) 841 return -EINVAL; 842 843 vaddr = kmap_atomic(page); 844 if (needs_clflush_before) 845 drm_clflush_virt_range(vaddr + shmem_page_offset, 846 page_length); 847 ret = __copy_from_user_inatomic(vaddr + shmem_page_offset, 848 user_data, page_length); 849 if (needs_clflush_after) 850 drm_clflush_virt_range(vaddr + shmem_page_offset, 851 page_length); 852 kunmap_atomic(vaddr); 853 854 return ret ? -EFAULT : 0; 855 } 856 857 /* Only difference to the fast-path function is that this can handle bit17 858 * and uses non-atomic copy and kmap functions. */ 859 static int 860 shmem_pwrite_slow(struct vm_page *page, int shmem_page_offset, int page_length, 861 char __user *user_data, 862 bool page_do_bit17_swizzling, 863 bool needs_clflush_before, 864 bool needs_clflush_after) 865 { 866 char *vaddr; 867 int ret; 868 869 vaddr = kmap(page); 870 if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) 871 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 872 page_length, 873 page_do_bit17_swizzling); 874 if (page_do_bit17_swizzling) 875 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset, 876 user_data, 877 page_length); 878 else 879 ret = __copy_from_user(vaddr + shmem_page_offset, 880 user_data, 881 page_length); 882 if (needs_clflush_after) 883 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 884 page_length, 885 page_do_bit17_swizzling); 886 kunmap(page); 887 888 return ret ? -EFAULT : 0; 889 } 890 891 static int 892 i915_gem_shmem_pwrite(struct drm_device *dev, 893 struct drm_i915_gem_object *obj, 894 struct drm_i915_gem_pwrite *args, 895 struct drm_file *file) 896 { 897 ssize_t remain; 898 loff_t offset; 899 char __user *user_data; 900 int shmem_page_offset, page_length, ret = 0; 901 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 902 int hit_slowpath = 0; 903 int needs_clflush_after = 0; 904 int needs_clflush_before = 0; 905 struct sg_page_iter sg_iter; 906 907 user_data = to_user_ptr(args->data_ptr); 908 remain = args->size; 909 910 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 911 912 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 913 /* If we're not in the cpu write domain, set ourself into the gtt 914 * write domain and manually flush cachelines (if required). This 915 * optimizes for the case when the gpu will use the data 916 * right away and we therefore have to clflush anyway. */ 917 needs_clflush_after = cpu_write_needs_clflush(obj); 918 ret = i915_gem_object_wait_rendering(obj, false); 919 if (ret) 920 return ret; 921 } 922 /* Same trick applies to invalidate partially written cachelines read 923 * before writing. */ 924 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) 925 needs_clflush_before = 926 !cpu_cache_is_coherent(dev, obj->cache_level); 927 928 ret = i915_gem_object_get_pages(obj); 929 if (ret) 930 return ret; 931 932 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 933 934 i915_gem_object_pin_pages(obj); 935 936 offset = args->offset; 937 obj->dirty = 1; 938 939 VM_OBJECT_LOCK(obj->base.vm_obj); 940 vm_object_pip_add(obj->base.vm_obj, 1); 941 942 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 943 offset >> PAGE_SHIFT) { 944 struct vm_page *page = sg_page_iter_page(&sg_iter); 945 int partial_cacheline_write; 946 947 if (remain <= 0) 948 break; 949 950 /* Operation in this page 951 * 952 * shmem_page_offset = offset within page in shmem file 953 * page_length = bytes to copy for this page 954 */ 955 shmem_page_offset = offset_in_page(offset); 956 957 page_length = remain; 958 if ((shmem_page_offset + page_length) > PAGE_SIZE) 959 page_length = PAGE_SIZE - shmem_page_offset; 960 961 /* If we don't overwrite a cacheline completely we need to be 962 * careful to have up-to-date data by first clflushing. Don't 963 * overcomplicate things and flush the entire patch. */ 964 partial_cacheline_write = needs_clflush_before && 965 ((shmem_page_offset | page_length) 966 & (cpu_clflush_line_size - 1)); 967 968 page_do_bit17_swizzling = obj_do_bit17_swizzling && 969 (page_to_phys(page) & (1 << 17)) != 0; 970 971 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length, 972 user_data, page_do_bit17_swizzling, 973 partial_cacheline_write, 974 needs_clflush_after); 975 if (ret == 0) 976 goto next_page; 977 978 hit_slowpath = 1; 979 mutex_unlock(&dev->struct_mutex); 980 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length, 981 user_data, page_do_bit17_swizzling, 982 partial_cacheline_write, 983 needs_clflush_after); 984 985 mutex_lock(&dev->struct_mutex); 986 987 if (ret) 988 goto out; 989 990 next_page: 991 remain -= page_length; 992 user_data += page_length; 993 offset += page_length; 994 } 995 vm_object_pip_wakeup(obj->base.vm_obj); 996 VM_OBJECT_UNLOCK(obj->base.vm_obj); 997 998 out: 999 i915_gem_object_unpin_pages(obj); 1000 1001 if (hit_slowpath) { 1002 /* 1003 * Fixup: Flush cpu caches in case we didn't flush the dirty 1004 * cachelines in-line while writing and the object moved 1005 * out of the cpu write domain while we've dropped the lock. 1006 */ 1007 if (!needs_clflush_after && 1008 obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 1009 if (i915_gem_clflush_object(obj, obj->pin_display)) 1010 needs_clflush_after = true; 1011 } 1012 } 1013 1014 if (needs_clflush_after) 1015 i915_gem_chipset_flush(dev); 1016 else 1017 obj->cache_dirty = true; 1018 1019 intel_fb_obj_flush(obj, false, ORIGIN_CPU); 1020 return ret; 1021 } 1022 1023 /** 1024 * Writes data to the object referenced by handle. 1025 * 1026 * On error, the contents of the buffer that were to be modified are undefined. 1027 */ 1028 int 1029 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 1030 struct drm_file *file) 1031 { 1032 struct drm_i915_private *dev_priv = dev->dev_private; 1033 struct drm_i915_gem_pwrite *args = data; 1034 struct drm_i915_gem_object *obj; 1035 int ret; 1036 1037 if (args->size == 0) 1038 return 0; 1039 1040 if (likely(!i915.prefault_disable)) { 1041 ret = fault_in_multipages_readable(to_user_ptr(args->data_ptr), 1042 args->size); 1043 if (ret) 1044 return -EFAULT; 1045 } 1046 1047 intel_runtime_pm_get(dev_priv); 1048 1049 ret = i915_mutex_lock_interruptible(dev); 1050 if (ret) 1051 goto put_rpm; 1052 1053 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1054 if (&obj->base == NULL) { 1055 ret = -ENOENT; 1056 goto unlock; 1057 } 1058 1059 /* Bounds check destination. */ 1060 if (args->offset > obj->base.size || 1061 args->size > obj->base.size - args->offset) { 1062 ret = -EINVAL; 1063 goto out; 1064 } 1065 1066 /* prime objects have no backing filp to GEM pread/pwrite 1067 * pages from. 1068 */ 1069 1070 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 1071 1072 ret = -EFAULT; 1073 /* We can only do the GTT pwrite on untiled buffers, as otherwise 1074 * it would end up going through the fenced access, and we'll get 1075 * different detiling behavior between reading and writing. 1076 * pread/pwrite currently are reading and writing from the CPU 1077 * perspective, requiring manual detiling by the client. 1078 */ 1079 if (obj->tiling_mode == I915_TILING_NONE && 1080 obj->base.write_domain != I915_GEM_DOMAIN_CPU && 1081 cpu_write_needs_clflush(obj)) { 1082 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); 1083 /* Note that the gtt paths might fail with non-page-backed user 1084 * pointers (e.g. gtt mappings when moving data between 1085 * textures). Fallback to the shmem path in that case. */ 1086 } 1087 1088 if (ret == -EFAULT || ret == -ENOSPC) { 1089 if (obj->phys_handle) 1090 ret = i915_gem_phys_pwrite(obj, args, file); 1091 else 1092 ret = i915_gem_shmem_pwrite(dev, obj, args, file); 1093 } 1094 1095 out: 1096 drm_gem_object_unreference(&obj->base); 1097 unlock: 1098 mutex_unlock(&dev->struct_mutex); 1099 put_rpm: 1100 intel_runtime_pm_put(dev_priv); 1101 1102 return ret; 1103 } 1104 1105 int 1106 i915_gem_check_wedge(struct i915_gpu_error *error, 1107 bool interruptible) 1108 { 1109 if (i915_reset_in_progress(error)) { 1110 /* Non-interruptible callers can't handle -EAGAIN, hence return 1111 * -EIO unconditionally for these. */ 1112 if (!interruptible) 1113 return -EIO; 1114 1115 /* Recovery complete, but the reset failed ... */ 1116 if (i915_terminally_wedged(error)) 1117 return -EIO; 1118 1119 /* 1120 * Check if GPU Reset is in progress - we need intel_ring_begin 1121 * to work properly to reinit the hw state while the gpu is 1122 * still marked as reset-in-progress. Handle this with a flag. 1123 */ 1124 if (!error->reload_in_reset) 1125 return -EAGAIN; 1126 } 1127 1128 return 0; 1129 } 1130 1131 static void fake_irq(unsigned long data) 1132 { 1133 wakeup_one((void *)data); 1134 } 1135 1136 static bool missed_irq(struct drm_i915_private *dev_priv, 1137 struct intel_engine_cs *ring) 1138 { 1139 return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings); 1140 } 1141 1142 #if 0 1143 static int __i915_spin_request(struct drm_i915_gem_request *req, int state) 1144 { 1145 unsigned long timeout; 1146 unsigned cpu; 1147 1148 /* When waiting for high frequency requests, e.g. during synchronous 1149 * rendering split between the CPU and GPU, the finite amount of time 1150 * required to set up the irq and wait upon it limits the response 1151 * rate. By busywaiting on the request completion for a short while we 1152 * can service the high frequency waits as quick as possible. However, 1153 * if it is a slow request, we want to sleep as quickly as possible. 1154 * The tradeoff between waiting and sleeping is roughly the time it 1155 * takes to sleep on a request, on the order of a microsecond. 1156 */ 1157 1158 if (req->ring->irq_refcount) 1159 return -EBUSY; 1160 1161 /* Only spin if we know the GPU is processing this request */ 1162 if (!i915_gem_request_started(req, true)) 1163 return -EAGAIN; 1164 1165 timeout = local_clock_us(&cpu) + 5; 1166 while (!need_resched()) { 1167 if (i915_gem_request_completed(req, true)) 1168 return 0; 1169 1170 if (signal_pending_state(state, current)) 1171 break; 1172 1173 if (busywait_stop(timeout, cpu)) 1174 break; 1175 1176 cpu_relax_lowlatency(); 1177 } 1178 1179 if (i915_gem_request_completed(req, false)) 1180 return 0; 1181 1182 return -EAGAIN; 1183 } 1184 #endif 1185 1186 /** 1187 * __i915_wait_request - wait until execution of request has finished 1188 * @req: duh! 1189 * @reset_counter: reset sequence associated with the given request 1190 * @interruptible: do an interruptible wait (normally yes) 1191 * @timeout: in - how long to wait (NULL forever); out - how much time remaining 1192 * 1193 * Note: It is of utmost importance that the passed in seqno and reset_counter 1194 * values have been read by the caller in an smp safe manner. Where read-side 1195 * locks are involved, it is sufficient to read the reset_counter before 1196 * unlocking the lock that protects the seqno. For lockless tricks, the 1197 * reset_counter _must_ be read before, and an appropriate smp_rmb must be 1198 * inserted. 1199 * 1200 * Returns 0 if the request was found within the alloted time. Else returns the 1201 * errno with remaining time filled in timeout argument. 1202 */ 1203 int __i915_wait_request(struct drm_i915_gem_request *req, 1204 unsigned reset_counter, 1205 bool interruptible, 1206 s64 *timeout, 1207 struct intel_rps_client *rps) 1208 { 1209 struct intel_engine_cs *ring = i915_gem_request_get_ring(req); 1210 struct drm_device *dev = ring->dev; 1211 struct drm_i915_private *dev_priv = dev->dev_private; 1212 const bool irq_test_in_progress = 1213 ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring); 1214 unsigned long timeout_expire; 1215 s64 before, now; 1216 int ret, sl_timeout = 1; 1217 1218 WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled"); 1219 1220 if (list_empty(&req->list)) 1221 return 0; 1222 1223 if (i915_gem_request_completed(req, true)) 1224 return 0; 1225 1226 timeout_expire = 0; 1227 if (timeout) { 1228 if (WARN_ON(*timeout < 0)) 1229 return -EINVAL; 1230 1231 if (*timeout == 0) 1232 return -ETIME; 1233 1234 timeout_expire = jiffies + nsecs_to_jiffies_timeout(*timeout); 1235 } 1236 1237 if (INTEL_INFO(dev_priv)->gen >= 6) 1238 gen6_rps_boost(dev_priv, rps, req->emitted_jiffies); 1239 1240 /* Record current time in case interrupted by signal, or wedged */ 1241 trace_i915_gem_request_wait_begin(req); 1242 before = ktime_get_raw_ns(); 1243 1244 /* Optimistic spin for the next jiffie before touching IRQs */ 1245 #if 0 1246 ret = __i915_spin_request(req); 1247 if (ret == 0) 1248 goto out; 1249 #endif 1250 1251 if (!irq_test_in_progress && WARN_ON(!ring->irq_get(ring))) { 1252 ret = -ENODEV; 1253 goto out; 1254 } 1255 1256 lockmgr(&ring->irq_queue.lock, LK_EXCLUSIVE); 1257 for (;;) { 1258 struct timer_list timer; 1259 1260 /* We need to check whether any gpu reset happened in between 1261 * the caller grabbing the seqno and now ... */ 1262 if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) { 1263 /* ... but upgrade the -EAGAIN to an -EIO if the gpu 1264 * is truely gone. */ 1265 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); 1266 if (ret == 0) 1267 ret = -EAGAIN; 1268 break; 1269 } 1270 1271 if (i915_gem_request_completed(req, false)) { 1272 ret = 0; 1273 break; 1274 } 1275 1276 if (interruptible && signal_pending(curthread->td_lwp)) { 1277 ret = -ERESTARTSYS; 1278 break; 1279 } 1280 1281 if (timeout && time_after_eq(jiffies, timeout_expire)) { 1282 ret = -ETIME; 1283 break; 1284 } 1285 1286 timer.function = NULL; 1287 if (timeout || missed_irq(dev_priv, ring)) { 1288 unsigned long expire; 1289 1290 setup_timer_on_stack(&timer, fake_irq, (unsigned long)&ring->irq_queue); 1291 expire = missed_irq(dev_priv, ring) ? jiffies + 1 : timeout_expire; 1292 sl_timeout = expire - jiffies; 1293 if (sl_timeout < 1) 1294 sl_timeout = 1; 1295 mod_timer(&timer, expire); 1296 } 1297 1298 #if 0 1299 io_schedule(); 1300 #endif 1301 1302 if (timer.function) { 1303 del_singleshot_timer_sync(&timer); 1304 destroy_timer_on_stack(&timer); 1305 } 1306 1307 lksleep(&ring->irq_queue, &ring->irq_queue.lock, 1308 interruptible ? PCATCH : 0, "lwe", sl_timeout); 1309 } 1310 lockmgr(&ring->irq_queue.lock, LK_RELEASE); 1311 if (!irq_test_in_progress) 1312 ring->irq_put(ring); 1313 1314 out: 1315 now = ktime_get_raw_ns(); 1316 trace_i915_gem_request_wait_end(req); 1317 1318 if (timeout) { 1319 s64 tres = *timeout - (now - before); 1320 1321 *timeout = tres < 0 ? 0 : tres; 1322 1323 /* 1324 * Apparently ktime isn't accurate enough and occasionally has a 1325 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch 1326 * things up to make the test happy. We allow up to 1 jiffy. 1327 * 1328 * This is a regrssion from the timespec->ktime conversion. 1329 */ 1330 if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000) 1331 *timeout = 0; 1332 } 1333 1334 return ret; 1335 } 1336 1337 int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, 1338 struct drm_file *file) 1339 { 1340 struct drm_i915_private *dev_private; 1341 struct drm_i915_file_private *file_priv; 1342 1343 WARN_ON(!req || !file || req->file_priv); 1344 1345 if (!req || !file) 1346 return -EINVAL; 1347 1348 if (req->file_priv) 1349 return -EINVAL; 1350 1351 dev_private = req->ring->dev->dev_private; 1352 file_priv = file->driver_priv; 1353 1354 spin_lock(&file_priv->mm.lock); 1355 req->file_priv = file_priv; 1356 list_add_tail(&req->client_list, &file_priv->mm.request_list); 1357 spin_unlock(&file_priv->mm.lock); 1358 1359 req->pid = curproc->p_pid; 1360 1361 return 0; 1362 } 1363 1364 static inline void 1365 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) 1366 { 1367 struct drm_i915_file_private *file_priv = request->file_priv; 1368 1369 if (!file_priv) 1370 return; 1371 1372 spin_lock(&file_priv->mm.lock); 1373 list_del(&request->client_list); 1374 request->file_priv = NULL; 1375 spin_unlock(&file_priv->mm.lock); 1376 1377 #if 0 1378 put_pid(request->pid); 1379 request->pid = NULL; 1380 #endif 1381 } 1382 1383 static void i915_gem_request_retire(struct drm_i915_gem_request *request) 1384 { 1385 trace_i915_gem_request_retire(request); 1386 1387 /* We know the GPU must have read the request to have 1388 * sent us the seqno + interrupt, so use the position 1389 * of tail of the request to update the last known position 1390 * of the GPU head. 1391 * 1392 * Note this requires that we are always called in request 1393 * completion order. 1394 */ 1395 request->ringbuf->last_retired_head = request->postfix; 1396 1397 list_del_init(&request->list); 1398 i915_gem_request_remove_from_client(request); 1399 1400 i915_gem_request_unreference(request); 1401 } 1402 1403 static void 1404 __i915_gem_request_retire__upto(struct drm_i915_gem_request *req) 1405 { 1406 struct intel_engine_cs *engine = req->ring; 1407 struct drm_i915_gem_request *tmp; 1408 1409 lockdep_assert_held(&engine->dev->struct_mutex); 1410 1411 if (list_empty(&req->list)) 1412 return; 1413 1414 do { 1415 tmp = list_first_entry(&engine->request_list, 1416 typeof(*tmp), list); 1417 1418 i915_gem_request_retire(tmp); 1419 } while (tmp != req); 1420 1421 WARN_ON(i915_verify_lists(engine->dev)); 1422 } 1423 1424 /** 1425 * Waits for a request to be signaled, and cleans up the 1426 * request and object lists appropriately for that event. 1427 */ 1428 int 1429 i915_wait_request(struct drm_i915_gem_request *req) 1430 { 1431 struct drm_device *dev; 1432 struct drm_i915_private *dev_priv; 1433 bool interruptible; 1434 int ret; 1435 1436 BUG_ON(req == NULL); 1437 1438 dev = req->ring->dev; 1439 dev_priv = dev->dev_private; 1440 interruptible = dev_priv->mm.interruptible; 1441 1442 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1443 1444 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); 1445 if (ret) 1446 return ret; 1447 1448 ret = __i915_wait_request(req, 1449 atomic_read(&dev_priv->gpu_error.reset_counter), 1450 interruptible, NULL, NULL); 1451 if (ret) 1452 return ret; 1453 1454 __i915_gem_request_retire__upto(req); 1455 return 0; 1456 } 1457 1458 /** 1459 * Ensures that all rendering to the object has completed and the object is 1460 * safe to unbind from the GTT or access from the CPU. 1461 */ 1462 int 1463 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, 1464 bool readonly) 1465 { 1466 int ret, i; 1467 1468 if (!obj->active) 1469 return 0; 1470 1471 if (readonly) { 1472 if (obj->last_write_req != NULL) { 1473 ret = i915_wait_request(obj->last_write_req); 1474 if (ret) 1475 return ret; 1476 1477 i = obj->last_write_req->ring->id; 1478 if (obj->last_read_req[i] == obj->last_write_req) 1479 i915_gem_object_retire__read(obj, i); 1480 else 1481 i915_gem_object_retire__write(obj); 1482 } 1483 } else { 1484 for (i = 0; i < I915_NUM_RINGS; i++) { 1485 if (obj->last_read_req[i] == NULL) 1486 continue; 1487 1488 ret = i915_wait_request(obj->last_read_req[i]); 1489 if (ret) 1490 return ret; 1491 1492 i915_gem_object_retire__read(obj, i); 1493 } 1494 RQ_BUG_ON(obj->active); 1495 } 1496 1497 return 0; 1498 } 1499 1500 static void 1501 i915_gem_object_retire_request(struct drm_i915_gem_object *obj, 1502 struct drm_i915_gem_request *req) 1503 { 1504 int ring = req->ring->id; 1505 1506 if (obj->last_read_req[ring] == req) 1507 i915_gem_object_retire__read(obj, ring); 1508 else if (obj->last_write_req == req) 1509 i915_gem_object_retire__write(obj); 1510 1511 __i915_gem_request_retire__upto(req); 1512 } 1513 1514 /* A nonblocking variant of the above wait. This is a highly dangerous routine 1515 * as the object state may change during this call. 1516 */ 1517 static __must_check int 1518 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, 1519 struct intel_rps_client *rps, 1520 bool readonly) 1521 { 1522 struct drm_device *dev = obj->base.dev; 1523 struct drm_i915_private *dev_priv = dev->dev_private; 1524 struct drm_i915_gem_request *requests[I915_NUM_RINGS]; 1525 unsigned reset_counter; 1526 int ret, i, n = 0; 1527 1528 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1529 BUG_ON(!dev_priv->mm.interruptible); 1530 1531 if (!obj->active) 1532 return 0; 1533 1534 ret = i915_gem_check_wedge(&dev_priv->gpu_error, true); 1535 if (ret) 1536 return ret; 1537 1538 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 1539 1540 if (readonly) { 1541 struct drm_i915_gem_request *req; 1542 1543 req = obj->last_write_req; 1544 if (req == NULL) 1545 return 0; 1546 1547 requests[n++] = i915_gem_request_reference(req); 1548 } else { 1549 for (i = 0; i < I915_NUM_RINGS; i++) { 1550 struct drm_i915_gem_request *req; 1551 1552 req = obj->last_read_req[i]; 1553 if (req == NULL) 1554 continue; 1555 1556 requests[n++] = i915_gem_request_reference(req); 1557 } 1558 } 1559 1560 mutex_unlock(&dev->struct_mutex); 1561 for (i = 0; ret == 0 && i < n; i++) 1562 ret = __i915_wait_request(requests[i], reset_counter, true, 1563 NULL, rps); 1564 mutex_lock(&dev->struct_mutex); 1565 1566 for (i = 0; i < n; i++) { 1567 if (ret == 0) 1568 i915_gem_object_retire_request(obj, requests[i]); 1569 i915_gem_request_unreference(requests[i]); 1570 } 1571 1572 return ret; 1573 } 1574 1575 static struct intel_rps_client *to_rps_client(struct drm_file *file) 1576 { 1577 struct drm_i915_file_private *fpriv = file->driver_priv; 1578 return &fpriv->rps; 1579 } 1580 1581 /** 1582 * Called when user space prepares to use an object with the CPU, either 1583 * through the mmap ioctl's mapping or a GTT mapping. 1584 */ 1585 int 1586 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1587 struct drm_file *file) 1588 { 1589 struct drm_i915_gem_set_domain *args = data; 1590 struct drm_i915_gem_object *obj; 1591 uint32_t read_domains = args->read_domains; 1592 uint32_t write_domain = args->write_domain; 1593 int ret; 1594 1595 /* Only handle setting domains to types used by the CPU. */ 1596 if (write_domain & I915_GEM_GPU_DOMAINS) 1597 return -EINVAL; 1598 1599 if (read_domains & I915_GEM_GPU_DOMAINS) 1600 return -EINVAL; 1601 1602 /* Having something in the write domain implies it's in the read 1603 * domain, and only that read domain. Enforce that in the request. 1604 */ 1605 if (write_domain != 0 && read_domains != write_domain) 1606 return -EINVAL; 1607 1608 ret = i915_mutex_lock_interruptible(dev); 1609 if (ret) 1610 return ret; 1611 1612 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1613 if (&obj->base == NULL) { 1614 ret = -ENOENT; 1615 goto unlock; 1616 } 1617 1618 /* Try to flush the object off the GPU without holding the lock. 1619 * We will repeat the flush holding the lock in the normal manner 1620 * to catch cases where we are gazumped. 1621 */ 1622 ret = i915_gem_object_wait_rendering__nonblocking(obj, 1623 to_rps_client(file), 1624 !write_domain); 1625 if (ret) 1626 goto unref; 1627 1628 if (read_domains & I915_GEM_DOMAIN_GTT) 1629 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 1630 else 1631 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 1632 1633 if (write_domain != 0) 1634 intel_fb_obj_invalidate(obj, 1635 write_domain == I915_GEM_DOMAIN_GTT ? 1636 ORIGIN_GTT : ORIGIN_CPU); 1637 1638 unref: 1639 drm_gem_object_unreference(&obj->base); 1640 unlock: 1641 mutex_unlock(&dev->struct_mutex); 1642 return ret; 1643 } 1644 1645 /** 1646 * Called when user space has done writes to this buffer 1647 */ 1648 int 1649 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1650 struct drm_file *file) 1651 { 1652 struct drm_i915_gem_sw_finish *args = data; 1653 struct drm_i915_gem_object *obj; 1654 int ret = 0; 1655 1656 ret = i915_mutex_lock_interruptible(dev); 1657 if (ret) 1658 return ret; 1659 1660 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1661 if (&obj->base == NULL) { 1662 ret = -ENOENT; 1663 goto unlock; 1664 } 1665 1666 /* Pinned buffers may be scanout, so flush the cache */ 1667 if (obj->pin_display) 1668 i915_gem_object_flush_cpu_write_domain(obj); 1669 1670 drm_gem_object_unreference(&obj->base); 1671 unlock: 1672 mutex_unlock(&dev->struct_mutex); 1673 return ret; 1674 } 1675 1676 /** 1677 * Maps the contents of an object, returning the address it is mapped 1678 * into. 1679 * 1680 * While the mapping holds a reference on the contents of the object, it doesn't 1681 * imply a ref on the object itself. 1682 * 1683 * IMPORTANT: 1684 * 1685 * DRM driver writers who look a this function as an example for how to do GEM 1686 * mmap support, please don't implement mmap support like here. The modern way 1687 * to implement DRM mmap support is with an mmap offset ioctl (like 1688 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly. 1689 * That way debug tooling like valgrind will understand what's going on, hiding 1690 * the mmap call in a driver private ioctl will break that. The i915 driver only 1691 * does cpu mmaps this way because we didn't know better. 1692 */ 1693 int 1694 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1695 struct drm_file *file) 1696 { 1697 struct drm_i915_gem_mmap *args = data; 1698 struct drm_gem_object *obj; 1699 unsigned long addr; 1700 1701 struct proc *p = curproc; 1702 vm_map_t map = &p->p_vmspace->vm_map; 1703 vm_size_t size; 1704 int error = 0, rv; 1705 1706 if (args->flags & ~(I915_MMAP_WC)) 1707 return -EINVAL; 1708 1709 obj = drm_gem_object_lookup(dev, file, args->handle); 1710 if (obj == NULL) 1711 return -ENOENT; 1712 1713 if (args->size == 0) 1714 goto out; 1715 1716 size = round_page(args->size); 1717 if (map->size + size > p->p_rlimit[RLIMIT_VMEM].rlim_cur) { 1718 error = -ENOMEM; 1719 goto out; 1720 } 1721 1722 /* prime objects have no backing filp to GEM mmap 1723 * pages from. 1724 */ 1725 1726 /* 1727 * Call hint to ensure that NULL is not returned as a valid address 1728 * and to reduce vm_map traversals. XXX causes instability, use a 1729 * fixed low address as the start point instead to avoid the NULL 1730 * return issue. 1731 */ 1732 1733 addr = PAGE_SIZE; 1734 1735 /* 1736 * Use 256KB alignment. It is unclear why this matters for a 1737 * virtual address but it appears to fix a number of application/X 1738 * crashes and kms console switching is much faster. 1739 */ 1740 vm_object_hold(obj->vm_obj); 1741 vm_object_reference_locked(obj->vm_obj); 1742 vm_object_drop(obj->vm_obj); 1743 1744 rv = vm_map_find(map, obj->vm_obj, NULL, 1745 args->offset, &addr, args->size, 1746 256 * 1024, /* align */ 1747 TRUE, /* fitit */ 1748 VM_MAPTYPE_NORMAL, VM_SUBSYS_DRM_GEM, 1749 VM_PROT_READ | VM_PROT_WRITE, /* prot */ 1750 VM_PROT_READ | VM_PROT_WRITE, /* max */ 1751 MAP_SHARED /* cow */); 1752 if (rv != KERN_SUCCESS) { 1753 vm_object_deallocate(obj->vm_obj); 1754 error = -vm_mmap_to_errno(rv); 1755 } else { 1756 args->addr_ptr = (uint64_t)addr; 1757 } 1758 out: 1759 drm_gem_object_unreference(obj); 1760 return (error); 1761 } 1762 1763 /** 1764 * i915_gem_fault - fault a page into the GTT 1765 * 1766 * vm_obj is locked on entry and expected to be locked on return. 1767 * 1768 * The vm_pager has placemarked the object with an anonymous memory page 1769 * which we must replace atomically to avoid races against concurrent faults 1770 * on the same page. XXX we currently are unable to do this atomically. 1771 * 1772 * If we are to return an error we should not touch the anonymous page, 1773 * the caller will deallocate it. 1774 * 1775 * XXX Most GEM calls appear to be interruptable, but we can't hard loop 1776 * in that case. Release all resources and wait 1 tick before retrying. 1777 * This is a huge problem which needs to be fixed by getting rid of most 1778 * of the interruptability. The linux code does not retry but does appear 1779 * to have some sort of mechanism (VM_FAULT_NOPAGE ?) for the higher level 1780 * to be able to retry. 1781 * 1782 * -- 1783 * @vma: VMA in question 1784 * @vmf: fault info 1785 * 1786 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1787 * from userspace. The fault handler takes care of binding the object to 1788 * the GTT (if needed), allocating and programming a fence register (again, 1789 * only if needed based on whether the old reg is still valid or the object 1790 * is tiled) and inserting a new PTE into the faulting process. 1791 * 1792 * Note that the faulting process may involve evicting existing objects 1793 * from the GTT and/or fence registers to make room. So performance may 1794 * suffer if the GTT working set is large or there are few fence registers 1795 * left. 1796 * 1797 * vm_obj is locked on entry and expected to be locked on return. The VM 1798 * pager has placed an anonymous memory page at (obj,offset) which we have 1799 * to replace. 1800 */ 1801 int i915_gem_fault(vm_object_t vm_obj, vm_ooffset_t offset, int prot, vm_page_t *mres) 1802 { 1803 struct drm_i915_gem_object *obj = to_intel_bo(vm_obj->handle); 1804 struct drm_device *dev = obj->base.dev; 1805 struct drm_i915_private *dev_priv = dev->dev_private; 1806 struct i915_ggtt_view view = i915_ggtt_view_normal; 1807 unsigned long page_offset; 1808 vm_page_t m, oldm = NULL; 1809 int ret = 0; 1810 bool write = !!(prot & VM_PROT_WRITE); 1811 1812 intel_runtime_pm_get(dev_priv); 1813 1814 /* We don't use vmf->pgoff since that has the fake offset */ 1815 page_offset = (unsigned long)offset; 1816 1817 retry: 1818 ret = i915_mutex_lock_interruptible(dev); 1819 if (ret) 1820 goto out; 1821 1822 trace_i915_gem_object_fault(obj, page_offset, true, write); 1823 1824 /* Try to flush the object off the GPU first without holding the lock. 1825 * Upon reacquiring the lock, we will perform our sanity checks and then 1826 * repeat the flush holding the lock in the normal manner to catch cases 1827 * where we are gazumped. 1828 */ 1829 ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write); 1830 if (ret) 1831 goto unlock; 1832 1833 /* Access to snoopable pages through the GTT is incoherent. */ 1834 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) { 1835 ret = -EFAULT; 1836 goto unlock; 1837 } 1838 1839 /* Use a partial view if the object is bigger than the aperture. */ 1840 if (obj->base.size >= dev_priv->gtt.mappable_end && 1841 obj->tiling_mode == I915_TILING_NONE) { 1842 #if 0 1843 static const unsigned int chunk_size = 256; // 1 MiB 1844 1845 memset(&view, 0, sizeof(view)); 1846 view.type = I915_GGTT_VIEW_PARTIAL; 1847 view.params.partial.offset = rounddown(page_offset, chunk_size); 1848 view.params.partial.size = 1849 min_t(unsigned int, 1850 chunk_size, 1851 (vma->vm_end - vma->vm_start)/PAGE_SIZE - 1852 view.params.partial.offset); 1853 #endif 1854 } 1855 1856 /* Now pin it into the GTT if needed */ 1857 ret = i915_gem_object_ggtt_pin(obj, &view, 0, PIN_MAPPABLE); 1858 if (ret) 1859 goto unlock; 1860 1861 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1862 if (ret) 1863 goto unpin; 1864 1865 ret = i915_gem_object_get_fence(obj); 1866 if (ret) 1867 goto unpin; 1868 1869 /* 1870 * START FREEBSD MAGIC 1871 * 1872 * Add a pip count to avoid destruction and certain other 1873 * complex operations (such as collapses?) while unlocked. 1874 */ 1875 vm_object_pip_add(vm_obj, 1); 1876 1877 /* 1878 * XXX We must currently remove the placeholder page now to avoid 1879 * a deadlock against a concurrent i915_gem_release_mmap(). 1880 * Otherwise concurrent operation will block on the busy page 1881 * while holding locks which we need to obtain. 1882 */ 1883 if (*mres != NULL) { 1884 oldm = *mres; 1885 if ((oldm->flags & PG_BUSY) == 0) 1886 kprintf("i915_gem_fault: Page was not busy\n"); 1887 else 1888 vm_page_remove(oldm); 1889 *mres = NULL; 1890 } else { 1891 oldm = NULL; 1892 } 1893 1894 ret = 0; 1895 m = NULL; 1896 1897 /* 1898 * Since the object lock was dropped, another thread might have 1899 * faulted on the same GTT address and instantiated the mapping. 1900 * Recheck. 1901 */ 1902 m = vm_page_lookup(vm_obj, OFF_TO_IDX(offset)); 1903 if (m != NULL) { 1904 /* 1905 * Try to busy the page, retry on failure (non-zero ret). 1906 */ 1907 if (vm_page_busy_try(m, false)) { 1908 kprintf("i915_gem_fault: PG_BUSY\n"); 1909 ret = -EINTR; 1910 goto unlock; 1911 } 1912 goto have_page; 1913 } 1914 /* 1915 * END FREEBSD MAGIC 1916 */ 1917 1918 obj->fault_mappable = true; 1919 1920 /* Finally, remap it using the new GTT offset */ 1921 m = vm_phys_fictitious_to_vm_page(dev_priv->gtt.mappable_base + 1922 i915_gem_obj_ggtt_offset_view(obj, &view) + offset); 1923 if (m == NULL) { 1924 ret = -EFAULT; 1925 goto unpin; 1926 } 1927 KASSERT((m->flags & PG_FICTITIOUS) != 0, ("not fictitious %p", m)); 1928 KASSERT(m->wire_count == 1, ("wire_count not 1 %p", m)); 1929 1930 /* 1931 * Try to busy the page. Fails on non-zero return. 1932 */ 1933 if (vm_page_busy_try(m, false)) { 1934 kprintf("i915_gem_fault: PG_BUSY(2)\n"); 1935 ret = -EINTR; 1936 goto unpin; 1937 } 1938 m->valid = VM_PAGE_BITS_ALL; 1939 1940 #if 0 1941 if (unlikely(view.type == I915_GGTT_VIEW_PARTIAL)) { 1942 /* Overriding existing pages in partial view does not cause 1943 * us any trouble as TLBs are still valid because the fault 1944 * is due to userspace losing part of the mapping or never 1945 * having accessed it before (at this partials' range). 1946 */ 1947 unsigned long base = vma->vm_start + 1948 (view.params.partial.offset << PAGE_SHIFT); 1949 unsigned int i; 1950 1951 for (i = 0; i < view.params.partial.size; i++) { 1952 ret = vm_insert_pfn(vma, base + i * PAGE_SIZE, pfn + i); 1953 if (ret) 1954 break; 1955 } 1956 1957 obj->fault_mappable = true; 1958 } else { 1959 if (!obj->fault_mappable) { 1960 unsigned long size = min_t(unsigned long, 1961 vma->vm_end - vma->vm_start, 1962 obj->base.size); 1963 int i; 1964 1965 for (i = 0; i < size >> PAGE_SHIFT; i++) { 1966 ret = vm_insert_pfn(vma, 1967 (unsigned long)vma->vm_start + i * PAGE_SIZE, 1968 pfn + i); 1969 if (ret) 1970 break; 1971 } 1972 1973 obj->fault_mappable = true; 1974 } else 1975 ret = vm_insert_pfn(vma, 1976 (unsigned long)vmf->virtual_address, 1977 pfn + page_offset); 1978 #endif 1979 vm_page_insert(m, vm_obj, OFF_TO_IDX(offset)); 1980 #if 0 1981 } 1982 #endif 1983 1984 have_page: 1985 *mres = m; 1986 1987 i915_gem_object_ggtt_unpin_view(obj, &view); 1988 mutex_unlock(&dev->struct_mutex); 1989 ret = VM_PAGER_OK; 1990 goto done; 1991 1992 /* 1993 * ALTERNATIVE ERROR RETURN. 1994 * 1995 * OBJECT EXPECTED TO BE LOCKED. 1996 */ 1997 unpin: 1998 i915_gem_object_ggtt_unpin_view(obj, &view); 1999 unlock: 2000 mutex_unlock(&dev->struct_mutex); 2001 out: 2002 switch (ret) { 2003 case -EIO: 2004 /* 2005 * We eat errors when the gpu is terminally wedged to avoid 2006 * userspace unduly crashing (gl has no provisions for mmaps to 2007 * fail). But any other -EIO isn't ours (e.g. swap in failure) 2008 * and so needs to be reported. 2009 */ 2010 if (!i915_terminally_wedged(&dev_priv->gpu_error)) { 2011 // ret = VM_FAULT_SIGBUS; 2012 break; 2013 } 2014 case -EAGAIN: 2015 /* 2016 * EAGAIN means the gpu is hung and we'll wait for the error 2017 * handler to reset everything when re-faulting in 2018 * i915_mutex_lock_interruptible. 2019 */ 2020 case -ERESTARTSYS: 2021 case -EINTR: 2022 VM_OBJECT_UNLOCK(vm_obj); 2023 int dummy; 2024 tsleep(&dummy, 0, "delay", 1); /* XXX */ 2025 VM_OBJECT_LOCK(vm_obj); 2026 goto retry; 2027 default: 2028 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 2029 ret = VM_PAGER_ERROR; 2030 break; 2031 } 2032 2033 done: 2034 if (oldm != NULL) 2035 vm_page_free(oldm); 2036 vm_object_pip_wakeup(vm_obj); 2037 2038 intel_runtime_pm_put(dev_priv); 2039 return ret; 2040 } 2041 2042 /** 2043 * i915_gem_release_mmap - remove physical page mappings 2044 * @obj: obj in question 2045 * 2046 * Preserve the reservation of the mmapping with the DRM core code, but 2047 * relinquish ownership of the pages back to the system. 2048 * 2049 * It is vital that we remove the page mapping if we have mapped a tiled 2050 * object through the GTT and then lose the fence register due to 2051 * resource pressure. Similarly if the object has been moved out of the 2052 * aperture, than pages mapped into userspace must be revoked. Removing the 2053 * mapping will then trigger a page fault on the next user access, allowing 2054 * fixup by i915_gem_fault(). 2055 */ 2056 void 2057 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 2058 { 2059 vm_object_t devobj; 2060 vm_page_t m; 2061 int i, page_count; 2062 2063 if (!obj->fault_mappable) 2064 return; 2065 2066 devobj = cdev_pager_lookup(obj); 2067 if (devobj != NULL) { 2068 page_count = OFF_TO_IDX(obj->base.size); 2069 2070 VM_OBJECT_LOCK(devobj); 2071 for (i = 0; i < page_count; i++) { 2072 m = vm_page_lookup_busy_wait(devobj, i, TRUE, "915unm"); 2073 if (m == NULL) 2074 continue; 2075 cdev_pager_free_page(devobj, m); 2076 } 2077 VM_OBJECT_UNLOCK(devobj); 2078 vm_object_deallocate(devobj); 2079 } 2080 2081 obj->fault_mappable = false; 2082 } 2083 2084 void 2085 i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv) 2086 { 2087 struct drm_i915_gem_object *obj; 2088 2089 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) 2090 i915_gem_release_mmap(obj); 2091 } 2092 2093 uint32_t 2094 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) 2095 { 2096 uint32_t gtt_size; 2097 2098 if (INTEL_INFO(dev)->gen >= 4 || 2099 tiling_mode == I915_TILING_NONE) 2100 return size; 2101 2102 /* Previous chips need a power-of-two fence region when tiling */ 2103 if (INTEL_INFO(dev)->gen == 3) 2104 gtt_size = 1024*1024; 2105 else 2106 gtt_size = 512*1024; 2107 2108 while (gtt_size < size) 2109 gtt_size <<= 1; 2110 2111 return gtt_size; 2112 } 2113 2114 /** 2115 * i915_gem_get_gtt_alignment - return required GTT alignment for an object 2116 * @obj: object to check 2117 * 2118 * Return the required GTT alignment for an object, taking into account 2119 * potential fence register mapping. 2120 */ 2121 uint32_t 2122 i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size, 2123 int tiling_mode, bool fenced) 2124 { 2125 /* 2126 * Minimum alignment is 4k (GTT page size), but might be greater 2127 * if a fence register is needed for the object. 2128 */ 2129 if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) || 2130 tiling_mode == I915_TILING_NONE) 2131 return 4096; 2132 2133 /* 2134 * Previous chips need to be aligned to the size of the smallest 2135 * fence register that can contain the object. 2136 */ 2137 return i915_gem_get_gtt_size(dev, size, tiling_mode); 2138 } 2139 2140 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) 2141 { 2142 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2143 int ret; 2144 2145 #if 0 2146 if (drm_vma_node_has_offset(&obj->base.vma_node)) 2147 return 0; 2148 #endif 2149 2150 dev_priv->mm.shrinker_no_lock_stealing = true; 2151 2152 ret = drm_gem_create_mmap_offset(&obj->base); 2153 if (ret != -ENOSPC) 2154 goto out; 2155 2156 /* Badly fragmented mmap space? The only way we can recover 2157 * space is by destroying unwanted objects. We can't randomly release 2158 * mmap_offsets as userspace expects them to be persistent for the 2159 * lifetime of the objects. The closest we can is to release the 2160 * offsets on purgeable objects by truncating it and marking it purged, 2161 * which prevents userspace from ever using that object again. 2162 */ 2163 i915_gem_shrink(dev_priv, 2164 obj->base.size >> PAGE_SHIFT, 2165 I915_SHRINK_BOUND | 2166 I915_SHRINK_UNBOUND | 2167 I915_SHRINK_PURGEABLE); 2168 ret = drm_gem_create_mmap_offset(&obj->base); 2169 if (ret != -ENOSPC) 2170 goto out; 2171 2172 i915_gem_shrink_all(dev_priv); 2173 ret = drm_gem_create_mmap_offset(&obj->base); 2174 out: 2175 dev_priv->mm.shrinker_no_lock_stealing = false; 2176 2177 return ret; 2178 } 2179 2180 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) 2181 { 2182 drm_gem_free_mmap_offset(&obj->base); 2183 } 2184 2185 int 2186 i915_gem_mmap_gtt(struct drm_file *file, 2187 struct drm_device *dev, 2188 uint32_t handle, 2189 uint64_t *offset) 2190 { 2191 struct drm_i915_gem_object *obj; 2192 int ret; 2193 2194 ret = i915_mutex_lock_interruptible(dev); 2195 if (ret) 2196 return ret; 2197 2198 obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle)); 2199 if (&obj->base == NULL) { 2200 ret = -ENOENT; 2201 goto unlock; 2202 } 2203 2204 if (obj->madv != I915_MADV_WILLNEED) { 2205 DRM_DEBUG("Attempting to mmap a purgeable buffer\n"); 2206 ret = -EFAULT; 2207 goto out; 2208 } 2209 2210 ret = i915_gem_object_create_mmap_offset(obj); 2211 if (ret) 2212 goto out; 2213 2214 *offset = DRM_GEM_MAPPING_OFF(obj->base.map_list.key) | 2215 DRM_GEM_MAPPING_KEY; 2216 2217 out: 2218 drm_gem_object_unreference(&obj->base); 2219 unlock: 2220 mutex_unlock(&dev->struct_mutex); 2221 return ret; 2222 } 2223 2224 /** 2225 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 2226 * @dev: DRM device 2227 * @data: GTT mapping ioctl data 2228 * @file: GEM object info 2229 * 2230 * Simply returns the fake offset to userspace so it can mmap it. 2231 * The mmap call will end up in drm_gem_mmap(), which will set things 2232 * up so we can get faults in the handler above. 2233 * 2234 * The fault handler will take care of binding the object into the GTT 2235 * (since it may have been evicted to make room for something), allocating 2236 * a fence register, and mapping the appropriate aperture address into 2237 * userspace. 2238 */ 2239 int 2240 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 2241 struct drm_file *file) 2242 { 2243 struct drm_i915_gem_mmap_gtt *args = data; 2244 2245 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); 2246 } 2247 2248 /* Immediately discard the backing storage */ 2249 static void 2250 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 2251 { 2252 vm_object_t vm_obj; 2253 2254 vm_obj = obj->base.vm_obj; 2255 VM_OBJECT_LOCK(vm_obj); 2256 vm_object_page_remove(vm_obj, 0, 0, false); 2257 VM_OBJECT_UNLOCK(vm_obj); 2258 2259 obj->madv = __I915_MADV_PURGED; 2260 } 2261 2262 /* Try to discard unwanted pages */ 2263 static void 2264 i915_gem_object_invalidate(struct drm_i915_gem_object *obj) 2265 { 2266 #if 0 2267 struct address_space *mapping; 2268 #endif 2269 2270 switch (obj->madv) { 2271 case I915_MADV_DONTNEED: 2272 i915_gem_object_truncate(obj); 2273 case __I915_MADV_PURGED: 2274 return; 2275 } 2276 2277 #if 0 2278 if (obj->base.filp == NULL) 2279 return; 2280 2281 mapping = file_inode(obj->base.filp)->i_mapping, 2282 invalidate_mapping_pages(mapping, 0, (loff_t)-1); 2283 #endif 2284 } 2285 2286 static void 2287 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) 2288 { 2289 struct sg_page_iter sg_iter; 2290 int ret; 2291 2292 BUG_ON(obj->madv == __I915_MADV_PURGED); 2293 2294 ret = i915_gem_object_set_to_cpu_domain(obj, true); 2295 if (ret) { 2296 /* In the event of a disaster, abandon all caches and 2297 * hope for the best. 2298 */ 2299 WARN_ON(ret != -EIO); 2300 i915_gem_clflush_object(obj, true); 2301 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 2302 } 2303 2304 i915_gem_gtt_finish_object(obj); 2305 2306 if (i915_gem_object_needs_bit17_swizzle(obj)) 2307 i915_gem_object_save_bit_17_swizzle(obj); 2308 2309 if (obj->madv == I915_MADV_DONTNEED) 2310 obj->dirty = 0; 2311 2312 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) { 2313 struct vm_page *page = sg_page_iter_page(&sg_iter); 2314 2315 if (obj->dirty) 2316 set_page_dirty(page); 2317 2318 if (obj->madv == I915_MADV_WILLNEED) 2319 mark_page_accessed(page); 2320 2321 vm_page_busy_wait(page, FALSE, "i915gem"); 2322 vm_page_unwire(page, 1); 2323 vm_page_wakeup(page); 2324 } 2325 obj->dirty = 0; 2326 2327 sg_free_table(obj->pages); 2328 kfree(obj->pages); 2329 } 2330 2331 int 2332 i915_gem_object_put_pages(struct drm_i915_gem_object *obj) 2333 { 2334 const struct drm_i915_gem_object_ops *ops = obj->ops; 2335 2336 if (obj->pages == NULL) 2337 return 0; 2338 2339 if (obj->pages_pin_count) 2340 return -EBUSY; 2341 2342 BUG_ON(i915_gem_obj_bound_any(obj)); 2343 2344 /* ->put_pages might need to allocate memory for the bit17 swizzle 2345 * array, hence protect them from being reaped by removing them from gtt 2346 * lists early. */ 2347 list_del(&obj->global_list); 2348 2349 ops->put_pages(obj); 2350 obj->pages = NULL; 2351 2352 i915_gem_object_invalidate(obj); 2353 2354 return 0; 2355 } 2356 2357 static int 2358 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 2359 { 2360 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2361 int page_count, i; 2362 vm_object_t vm_obj; 2363 struct sg_table *st; 2364 struct scatterlist *sg; 2365 struct sg_page_iter sg_iter; 2366 struct vm_page *page; 2367 unsigned long last_pfn = 0; /* suppress gcc warning */ 2368 int ret; 2369 2370 /* Assert that the object is not currently in any GPU domain. As it 2371 * wasn't in the GTT, there shouldn't be any way it could have been in 2372 * a GPU cache 2373 */ 2374 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); 2375 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); 2376 2377 st = kmalloc(sizeof(*st), M_DRM, M_WAITOK); 2378 if (st == NULL) 2379 return -ENOMEM; 2380 2381 page_count = obj->base.size / PAGE_SIZE; 2382 if (sg_alloc_table(st, page_count, GFP_KERNEL)) { 2383 kfree(st); 2384 return -ENOMEM; 2385 } 2386 2387 /* Get the list of pages out of our struct file. They'll be pinned 2388 * at this point until we release them. 2389 * 2390 * Fail silently without starting the shrinker 2391 */ 2392 vm_obj = obj->base.vm_obj; 2393 VM_OBJECT_LOCK(vm_obj); 2394 sg = st->sgl; 2395 st->nents = 0; 2396 for (i = 0; i < page_count; i++) { 2397 page = shmem_read_mapping_page(vm_obj, i); 2398 if (IS_ERR(page)) { 2399 i915_gem_shrink(dev_priv, 2400 page_count, 2401 I915_SHRINK_BOUND | 2402 I915_SHRINK_UNBOUND | 2403 I915_SHRINK_PURGEABLE); 2404 page = shmem_read_mapping_page(vm_obj, i); 2405 } 2406 if (IS_ERR(page)) { 2407 /* We've tried hard to allocate the memory by reaping 2408 * our own buffer, now let the real VM do its job and 2409 * go down in flames if truly OOM. 2410 */ 2411 i915_gem_shrink_all(dev_priv); 2412 page = shmem_read_mapping_page(vm_obj, i); 2413 if (IS_ERR(page)) { 2414 ret = PTR_ERR(page); 2415 goto err_pages; 2416 } 2417 } 2418 #ifdef CONFIG_SWIOTLB 2419 if (swiotlb_nr_tbl()) { 2420 st->nents++; 2421 sg_set_page(sg, page, PAGE_SIZE, 0); 2422 sg = sg_next(sg); 2423 continue; 2424 } 2425 #endif 2426 if (!i || page_to_pfn(page) != last_pfn + 1) { 2427 if (i) 2428 sg = sg_next(sg); 2429 st->nents++; 2430 sg_set_page(sg, page, PAGE_SIZE, 0); 2431 } else { 2432 sg->length += PAGE_SIZE; 2433 } 2434 last_pfn = page_to_pfn(page); 2435 2436 /* Check that the i965g/gm workaround works. */ 2437 } 2438 #ifdef CONFIG_SWIOTLB 2439 if (!swiotlb_nr_tbl()) 2440 #endif 2441 sg_mark_end(sg); 2442 obj->pages = st; 2443 VM_OBJECT_UNLOCK(vm_obj); 2444 2445 ret = i915_gem_gtt_prepare_object(obj); 2446 if (ret) 2447 goto err_pages; 2448 2449 if (i915_gem_object_needs_bit17_swizzle(obj)) 2450 i915_gem_object_do_bit_17_swizzle(obj); 2451 2452 if (obj->tiling_mode != I915_TILING_NONE && 2453 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) 2454 i915_gem_object_pin_pages(obj); 2455 2456 return 0; 2457 2458 err_pages: 2459 sg_mark_end(sg); 2460 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) { 2461 page = sg_page_iter_page(&sg_iter); 2462 vm_page_busy_wait(page, FALSE, "i915gem"); 2463 vm_page_unwire(page, 0); 2464 vm_page_wakeup(page); 2465 } 2466 VM_OBJECT_UNLOCK(vm_obj); 2467 sg_free_table(st); 2468 kfree(st); 2469 2470 /* shmemfs first checks if there is enough memory to allocate the page 2471 * and reports ENOSPC should there be insufficient, along with the usual 2472 * ENOMEM for a genuine allocation failure. 2473 * 2474 * We use ENOSPC in our driver to mean that we have run out of aperture 2475 * space and so want to translate the error from shmemfs back to our 2476 * usual understanding of ENOMEM. 2477 */ 2478 if (ret == -ENOSPC) 2479 ret = -ENOMEM; 2480 2481 return ret; 2482 } 2483 2484 /* Ensure that the associated pages are gathered from the backing storage 2485 * and pinned into our object. i915_gem_object_get_pages() may be called 2486 * multiple times before they are released by a single call to 2487 * i915_gem_object_put_pages() - once the pages are no longer referenced 2488 * either as a result of memory pressure (reaping pages under the shrinker) 2489 * or as the object is itself released. 2490 */ 2491 int 2492 i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2493 { 2494 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2495 const struct drm_i915_gem_object_ops *ops = obj->ops; 2496 int ret; 2497 2498 if (obj->pages) 2499 return 0; 2500 2501 if (obj->madv != I915_MADV_WILLNEED) { 2502 DRM_DEBUG("Attempting to obtain a purgeable object\n"); 2503 return -EFAULT; 2504 } 2505 2506 BUG_ON(obj->pages_pin_count); 2507 2508 ret = ops->get_pages(obj); 2509 if (ret) 2510 return ret; 2511 2512 list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list); 2513 2514 obj->get_page.sg = obj->pages->sgl; 2515 obj->get_page.last = 0; 2516 2517 return 0; 2518 } 2519 2520 void i915_vma_move_to_active(struct i915_vma *vma, 2521 struct drm_i915_gem_request *req) 2522 { 2523 struct drm_i915_gem_object *obj = vma->obj; 2524 struct intel_engine_cs *ring; 2525 2526 ring = i915_gem_request_get_ring(req); 2527 2528 /* Add a reference if we're newly entering the active list. */ 2529 if (obj->active == 0) 2530 drm_gem_object_reference(&obj->base); 2531 obj->active |= intel_ring_flag(ring); 2532 2533 list_move_tail(&obj->ring_list[ring->id], &ring->active_list); 2534 i915_gem_request_assign(&obj->last_read_req[ring->id], req); 2535 2536 list_move_tail(&vma->mm_list, &vma->vm->active_list); 2537 } 2538 2539 static void 2540 i915_gem_object_retire__write(struct drm_i915_gem_object *obj) 2541 { 2542 RQ_BUG_ON(obj->last_write_req == NULL); 2543 RQ_BUG_ON(!(obj->active & intel_ring_flag(obj->last_write_req->ring))); 2544 2545 i915_gem_request_assign(&obj->last_write_req, NULL); 2546 intel_fb_obj_flush(obj, true, ORIGIN_CS); 2547 } 2548 2549 static void 2550 i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring) 2551 { 2552 struct i915_vma *vma; 2553 2554 RQ_BUG_ON(obj->last_read_req[ring] == NULL); 2555 RQ_BUG_ON(!(obj->active & (1 << ring))); 2556 2557 list_del_init(&obj->ring_list[ring]); 2558 i915_gem_request_assign(&obj->last_read_req[ring], NULL); 2559 2560 if (obj->last_write_req && obj->last_write_req->ring->id == ring) 2561 i915_gem_object_retire__write(obj); 2562 2563 obj->active &= ~(1 << ring); 2564 if (obj->active) 2565 return; 2566 2567 /* Bump our place on the bound list to keep it roughly in LRU order 2568 * so that we don't steal from recently used but inactive objects 2569 * (unless we are forced to ofc!) 2570 */ 2571 list_move_tail(&obj->global_list, 2572 &to_i915(obj->base.dev)->mm.bound_list); 2573 2574 list_for_each_entry(vma, &obj->vma_list, vma_link) { 2575 if (!list_empty(&vma->mm_list)) 2576 list_move_tail(&vma->mm_list, &vma->vm->inactive_list); 2577 } 2578 2579 i915_gem_request_assign(&obj->last_fenced_req, NULL); 2580 drm_gem_object_unreference(&obj->base); 2581 } 2582 2583 static int 2584 i915_gem_init_seqno(struct drm_device *dev, u32 seqno) 2585 { 2586 struct drm_i915_private *dev_priv = dev->dev_private; 2587 struct intel_engine_cs *ring; 2588 int ret, i, j; 2589 2590 /* Carefully retire all requests without writing to the rings */ 2591 for_each_ring(ring, dev_priv, i) { 2592 ret = intel_ring_idle(ring); 2593 if (ret) 2594 return ret; 2595 } 2596 i915_gem_retire_requests(dev); 2597 2598 /* Finally reset hw state */ 2599 for_each_ring(ring, dev_priv, i) { 2600 intel_ring_init_seqno(ring, seqno); 2601 2602 for (j = 0; j < ARRAY_SIZE(ring->semaphore.sync_seqno); j++) 2603 ring->semaphore.sync_seqno[j] = 0; 2604 } 2605 2606 return 0; 2607 } 2608 2609 int i915_gem_set_seqno(struct drm_device *dev, u32 seqno) 2610 { 2611 struct drm_i915_private *dev_priv = dev->dev_private; 2612 int ret; 2613 2614 if (seqno == 0) 2615 return -EINVAL; 2616 2617 /* HWS page needs to be set less than what we 2618 * will inject to ring 2619 */ 2620 ret = i915_gem_init_seqno(dev, seqno - 1); 2621 if (ret) 2622 return ret; 2623 2624 /* Carefully set the last_seqno value so that wrap 2625 * detection still works 2626 */ 2627 dev_priv->next_seqno = seqno; 2628 dev_priv->last_seqno = seqno - 1; 2629 if (dev_priv->last_seqno == 0) 2630 dev_priv->last_seqno--; 2631 2632 return 0; 2633 } 2634 2635 int 2636 i915_gem_get_seqno(struct drm_device *dev, u32 *seqno) 2637 { 2638 struct drm_i915_private *dev_priv = dev->dev_private; 2639 2640 /* reserve 0 for non-seqno */ 2641 if (dev_priv->next_seqno == 0) { 2642 int ret = i915_gem_init_seqno(dev, 0); 2643 if (ret) 2644 return ret; 2645 2646 dev_priv->next_seqno = 1; 2647 } 2648 2649 *seqno = dev_priv->last_seqno = dev_priv->next_seqno++; 2650 return 0; 2651 } 2652 2653 /* 2654 * NB: This function is not allowed to fail. Doing so would mean the the 2655 * request is not being tracked for completion but the work itself is 2656 * going to happen on the hardware. This would be a Bad Thing(tm). 2657 */ 2658 void __i915_add_request(struct drm_i915_gem_request *request, 2659 struct drm_i915_gem_object *obj, 2660 bool flush_caches) 2661 { 2662 struct intel_engine_cs *ring; 2663 struct drm_i915_private *dev_priv; 2664 struct intel_ringbuffer *ringbuf; 2665 u32 request_start; 2666 int ret; 2667 2668 if (WARN_ON(request == NULL)) 2669 return; 2670 2671 ring = request->ring; 2672 dev_priv = ring->dev->dev_private; 2673 ringbuf = request->ringbuf; 2674 2675 /* 2676 * To ensure that this call will not fail, space for its emissions 2677 * should already have been reserved in the ring buffer. Let the ring 2678 * know that it is time to use that space up. 2679 */ 2680 intel_ring_reserved_space_use(ringbuf); 2681 2682 request_start = intel_ring_get_tail(ringbuf); 2683 /* 2684 * Emit any outstanding flushes - execbuf can fail to emit the flush 2685 * after having emitted the batchbuffer command. Hence we need to fix 2686 * things up similar to emitting the lazy request. The difference here 2687 * is that the flush _must_ happen before the next request, no matter 2688 * what. 2689 */ 2690 if (flush_caches) { 2691 if (i915.enable_execlists) 2692 ret = logical_ring_flush_all_caches(request); 2693 else 2694 ret = intel_ring_flush_all_caches(request); 2695 /* Not allowed to fail! */ 2696 WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret); 2697 } 2698 2699 /* Record the position of the start of the request so that 2700 * should we detect the updated seqno part-way through the 2701 * GPU processing the request, we never over-estimate the 2702 * position of the head. 2703 */ 2704 request->postfix = intel_ring_get_tail(ringbuf); 2705 2706 if (i915.enable_execlists) 2707 ret = ring->emit_request(request); 2708 else { 2709 ret = ring->add_request(request); 2710 2711 request->tail = intel_ring_get_tail(ringbuf); 2712 } 2713 2714 /* Not allowed to fail! */ 2715 WARN(ret, "emit|add_request failed: %d!\n", ret); 2716 2717 request->head = request_start; 2718 2719 /* Whilst this request exists, batch_obj will be on the 2720 * active_list, and so will hold the active reference. Only when this 2721 * request is retired will the the batch_obj be moved onto the 2722 * inactive_list and lose its active reference. Hence we do not need 2723 * to explicitly hold another reference here. 2724 */ 2725 request->batch_obj = obj; 2726 2727 request->emitted_jiffies = jiffies; 2728 request->previous_seqno = ring->last_submitted_seqno; 2729 ring->last_submitted_seqno = request->seqno; 2730 list_add_tail(&request->list, &ring->request_list); 2731 2732 trace_i915_gem_request_add(request); 2733 2734 i915_queue_hangcheck(ring->dev); 2735 2736 queue_delayed_work(dev_priv->wq, 2737 &dev_priv->mm.retire_work, 2738 round_jiffies_up_relative(HZ)); 2739 intel_mark_busy(dev_priv->dev); 2740 2741 /* Sanity check that the reserved size was large enough. */ 2742 intel_ring_reserved_space_end(ringbuf); 2743 } 2744 2745 static bool i915_context_is_banned(struct drm_i915_private *dev_priv, 2746 const struct intel_context *ctx) 2747 { 2748 unsigned long elapsed; 2749 2750 elapsed = get_seconds() - ctx->hang_stats.guilty_ts; 2751 2752 if (ctx->hang_stats.banned) 2753 return true; 2754 2755 if (ctx->hang_stats.ban_period_seconds && 2756 elapsed <= ctx->hang_stats.ban_period_seconds) { 2757 if (!i915_gem_context_is_default(ctx)) { 2758 DRM_DEBUG("context hanging too fast, banning!\n"); 2759 return true; 2760 } else if (i915_stop_ring_allow_ban(dev_priv)) { 2761 if (i915_stop_ring_allow_warn(dev_priv)) 2762 DRM_ERROR("gpu hanging too fast, banning!\n"); 2763 return true; 2764 } 2765 } 2766 2767 return false; 2768 } 2769 2770 static void i915_set_reset_status(struct drm_i915_private *dev_priv, 2771 struct intel_context *ctx, 2772 const bool guilty) 2773 { 2774 struct i915_ctx_hang_stats *hs; 2775 2776 if (WARN_ON(!ctx)) 2777 return; 2778 2779 hs = &ctx->hang_stats; 2780 2781 if (guilty) { 2782 hs->banned = i915_context_is_banned(dev_priv, ctx); 2783 hs->batch_active++; 2784 hs->guilty_ts = get_seconds(); 2785 } else { 2786 hs->batch_pending++; 2787 } 2788 } 2789 2790 void i915_gem_request_free(struct kref *req_ref) 2791 { 2792 struct drm_i915_gem_request *req = container_of(req_ref, 2793 typeof(*req), ref); 2794 struct intel_context *ctx = req->ctx; 2795 2796 if (req->file_priv) 2797 i915_gem_request_remove_from_client(req); 2798 2799 if (ctx) { 2800 if (i915.enable_execlists) { 2801 if (ctx != req->ring->default_context) 2802 intel_lr_context_unpin(req); 2803 } 2804 2805 i915_gem_context_unreference(ctx); 2806 } 2807 2808 kfree(req); 2809 } 2810 2811 int i915_gem_request_alloc(struct intel_engine_cs *ring, 2812 struct intel_context *ctx, 2813 struct drm_i915_gem_request **req_out) 2814 { 2815 struct drm_i915_private *dev_priv = to_i915(ring->dev); 2816 struct drm_i915_gem_request *req; 2817 int ret; 2818 2819 if (!req_out) 2820 return -EINVAL; 2821 2822 *req_out = NULL; 2823 2824 req = kzalloc(sizeof(*req), GFP_KERNEL); 2825 if (req == NULL) 2826 return -ENOMEM; 2827 2828 ret = i915_gem_get_seqno(ring->dev, &req->seqno); 2829 if (ret) 2830 goto err; 2831 2832 kref_init(&req->ref); 2833 req->i915 = dev_priv; 2834 req->ring = ring; 2835 req->ctx = ctx; 2836 i915_gem_context_reference(req->ctx); 2837 2838 if (i915.enable_execlists) 2839 ret = intel_logical_ring_alloc_request_extras(req); 2840 else 2841 ret = intel_ring_alloc_request_extras(req); 2842 if (ret) { 2843 i915_gem_context_unreference(req->ctx); 2844 goto err; 2845 } 2846 2847 /* 2848 * Reserve space in the ring buffer for all the commands required to 2849 * eventually emit this request. This is to guarantee that the 2850 * i915_add_request() call can't fail. Note that the reserve may need 2851 * to be redone if the request is not actually submitted straight 2852 * away, e.g. because a GPU scheduler has deferred it. 2853 */ 2854 if (i915.enable_execlists) 2855 ret = intel_logical_ring_reserve_space(req); 2856 else 2857 ret = intel_ring_reserve_space(req); 2858 if (ret) { 2859 /* 2860 * At this point, the request is fully allocated even if not 2861 * fully prepared. Thus it can be cleaned up using the proper 2862 * free code. 2863 */ 2864 i915_gem_request_cancel(req); 2865 return ret; 2866 } 2867 2868 *req_out = req; 2869 return 0; 2870 2871 err: 2872 kfree(req); 2873 return ret; 2874 } 2875 2876 void i915_gem_request_cancel(struct drm_i915_gem_request *req) 2877 { 2878 intel_ring_reserved_space_cancel(req->ringbuf); 2879 2880 i915_gem_request_unreference(req); 2881 } 2882 2883 struct drm_i915_gem_request * 2884 i915_gem_find_active_request(struct intel_engine_cs *ring) 2885 { 2886 struct drm_i915_gem_request *request; 2887 2888 list_for_each_entry(request, &ring->request_list, list) { 2889 if (i915_gem_request_completed(request, false)) 2890 continue; 2891 2892 return request; 2893 } 2894 2895 return NULL; 2896 } 2897 2898 static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv, 2899 struct intel_engine_cs *ring) 2900 { 2901 struct drm_i915_gem_request *request; 2902 bool ring_hung; 2903 2904 request = i915_gem_find_active_request(ring); 2905 2906 if (request == NULL) 2907 return; 2908 2909 ring_hung = ring->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG; 2910 2911 i915_set_reset_status(dev_priv, request->ctx, ring_hung); 2912 2913 list_for_each_entry_continue(request, &ring->request_list, list) 2914 i915_set_reset_status(dev_priv, request->ctx, false); 2915 } 2916 2917 static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv, 2918 struct intel_engine_cs *ring) 2919 { 2920 struct intel_ringbuffer *buffer; 2921 2922 while (!list_empty(&ring->active_list)) { 2923 struct drm_i915_gem_object *obj; 2924 2925 obj = list_first_entry(&ring->active_list, 2926 struct drm_i915_gem_object, 2927 ring_list[ring->id]); 2928 2929 i915_gem_object_retire__read(obj, ring->id); 2930 } 2931 2932 /* 2933 * Clear the execlists queue up before freeing the requests, as those 2934 * are the ones that keep the context and ringbuffer backing objects 2935 * pinned in place. 2936 */ 2937 2938 if (i915.enable_execlists) { 2939 spin_lock_irq(&ring->execlist_lock); 2940 2941 /* list_splice_tail_init checks for empty lists */ 2942 list_splice_tail_init(&ring->execlist_queue, 2943 &ring->execlist_retired_req_list); 2944 2945 spin_unlock_irq(&ring->execlist_lock); 2946 intel_execlists_retire_requests(ring); 2947 } 2948 2949 /* 2950 * We must free the requests after all the corresponding objects have 2951 * been moved off active lists. Which is the same order as the normal 2952 * retire_requests function does. This is important if object hold 2953 * implicit references on things like e.g. ppgtt address spaces through 2954 * the request. 2955 */ 2956 while (!list_empty(&ring->request_list)) { 2957 struct drm_i915_gem_request *request; 2958 2959 request = list_first_entry(&ring->request_list, 2960 struct drm_i915_gem_request, 2961 list); 2962 2963 i915_gem_request_retire(request); 2964 } 2965 2966 /* Having flushed all requests from all queues, we know that all 2967 * ringbuffers must now be empty. However, since we do not reclaim 2968 * all space when retiring the request (to prevent HEADs colliding 2969 * with rapid ringbuffer wraparound) the amount of available space 2970 * upon reset is less than when we start. Do one more pass over 2971 * all the ringbuffers to reset last_retired_head. 2972 */ 2973 list_for_each_entry(buffer, &ring->buffers, link) { 2974 buffer->last_retired_head = buffer->tail; 2975 intel_ring_update_space(buffer); 2976 } 2977 } 2978 2979 void i915_gem_reset(struct drm_device *dev) 2980 { 2981 struct drm_i915_private *dev_priv = dev->dev_private; 2982 struct intel_engine_cs *ring; 2983 int i; 2984 2985 /* 2986 * Before we free the objects from the requests, we need to inspect 2987 * them for finding the guilty party. As the requests only borrow 2988 * their reference to the objects, the inspection must be done first. 2989 */ 2990 for_each_ring(ring, dev_priv, i) 2991 i915_gem_reset_ring_status(dev_priv, ring); 2992 2993 for_each_ring(ring, dev_priv, i) 2994 i915_gem_reset_ring_cleanup(dev_priv, ring); 2995 2996 i915_gem_context_reset(dev); 2997 2998 i915_gem_restore_fences(dev); 2999 3000 WARN_ON(i915_verify_lists(dev)); 3001 } 3002 3003 /** 3004 * This function clears the request list as sequence numbers are passed. 3005 */ 3006 void 3007 i915_gem_retire_requests_ring(struct intel_engine_cs *ring) 3008 { 3009 WARN_ON(i915_verify_lists(ring->dev)); 3010 3011 /* Retire requests first as we use it above for the early return. 3012 * If we retire requests last, we may use a later seqno and so clear 3013 * the requests lists without clearing the active list, leading to 3014 * confusion. 3015 */ 3016 while (!list_empty(&ring->request_list)) { 3017 struct drm_i915_gem_request *request; 3018 3019 request = list_first_entry(&ring->request_list, 3020 struct drm_i915_gem_request, 3021 list); 3022 3023 if (!i915_gem_request_completed(request, true)) 3024 break; 3025 3026 i915_gem_request_retire(request); 3027 } 3028 3029 /* Move any buffers on the active list that are no longer referenced 3030 * by the ringbuffer to the flushing/inactive lists as appropriate, 3031 * before we free the context associated with the requests. 3032 */ 3033 while (!list_empty(&ring->active_list)) { 3034 struct drm_i915_gem_object *obj; 3035 3036 obj = list_first_entry(&ring->active_list, 3037 struct drm_i915_gem_object, 3038 ring_list[ring->id]); 3039 3040 if (!list_empty(&obj->last_read_req[ring->id]->list)) 3041 break; 3042 3043 i915_gem_object_retire__read(obj, ring->id); 3044 } 3045 3046 if (unlikely(ring->trace_irq_req && 3047 i915_gem_request_completed(ring->trace_irq_req, true))) { 3048 ring->irq_put(ring); 3049 i915_gem_request_assign(&ring->trace_irq_req, NULL); 3050 } 3051 3052 WARN_ON(i915_verify_lists(ring->dev)); 3053 } 3054 3055 bool 3056 i915_gem_retire_requests(struct drm_device *dev) 3057 { 3058 struct drm_i915_private *dev_priv = dev->dev_private; 3059 struct intel_engine_cs *ring; 3060 bool idle = true; 3061 int i; 3062 3063 for_each_ring(ring, dev_priv, i) { 3064 i915_gem_retire_requests_ring(ring); 3065 idle &= list_empty(&ring->request_list); 3066 if (i915.enable_execlists) { 3067 unsigned long flags; 3068 3069 spin_lock_irqsave(&ring->execlist_lock, flags); 3070 idle &= list_empty(&ring->execlist_queue); 3071 spin_unlock_irqrestore(&ring->execlist_lock, flags); 3072 3073 intel_execlists_retire_requests(ring); 3074 } 3075 } 3076 3077 if (idle) 3078 mod_delayed_work(dev_priv->wq, 3079 &dev_priv->mm.idle_work, 3080 msecs_to_jiffies(100)); 3081 3082 return idle; 3083 } 3084 3085 static void 3086 i915_gem_retire_work_handler(struct work_struct *work) 3087 { 3088 struct drm_i915_private *dev_priv = 3089 container_of(work, typeof(*dev_priv), mm.retire_work.work); 3090 struct drm_device *dev = dev_priv->dev; 3091 bool idle; 3092 3093 /* Come back later if the device is busy... */ 3094 idle = false; 3095 if (mutex_trylock(&dev->struct_mutex)) { 3096 idle = i915_gem_retire_requests(dev); 3097 mutex_unlock(&dev->struct_mutex); 3098 } 3099 if (!idle) 3100 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 3101 round_jiffies_up_relative(HZ)); 3102 } 3103 3104 static void 3105 i915_gem_idle_work_handler(struct work_struct *work) 3106 { 3107 struct drm_i915_private *dev_priv = 3108 container_of(work, typeof(*dev_priv), mm.idle_work.work); 3109 struct drm_device *dev = dev_priv->dev; 3110 struct intel_engine_cs *ring; 3111 int i; 3112 3113 for_each_ring(ring, dev_priv, i) 3114 if (!list_empty(&ring->request_list)) 3115 return; 3116 3117 /* we probably should sync with hangcheck here, using cancel_work_sync. 3118 * Also locking seems to be fubar here, ring->request_list is protected 3119 * by dev->struct_mutex. */ 3120 3121 intel_mark_idle(dev); 3122 3123 if (mutex_trylock(&dev->struct_mutex)) { 3124 struct intel_engine_cs *ring; 3125 int i; 3126 3127 for_each_ring(ring, dev_priv, i) 3128 i915_gem_batch_pool_fini(&ring->batch_pool); 3129 3130 mutex_unlock(&dev->struct_mutex); 3131 } 3132 } 3133 3134 /** 3135 * Ensures that an object will eventually get non-busy by flushing any required 3136 * write domains, emitting any outstanding lazy request and retiring and 3137 * completed requests. 3138 */ 3139 static int 3140 i915_gem_object_flush_active(struct drm_i915_gem_object *obj) 3141 { 3142 int i; 3143 3144 if (!obj->active) 3145 return 0; 3146 3147 for (i = 0; i < I915_NUM_RINGS; i++) { 3148 struct drm_i915_gem_request *req; 3149 3150 req = obj->last_read_req[i]; 3151 if (req == NULL) 3152 continue; 3153 3154 if (list_empty(&req->list)) 3155 goto retire; 3156 3157 if (i915_gem_request_completed(req, true)) { 3158 __i915_gem_request_retire__upto(req); 3159 retire: 3160 i915_gem_object_retire__read(obj, i); 3161 } 3162 } 3163 3164 return 0; 3165 } 3166 3167 /** 3168 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT 3169 * @DRM_IOCTL_ARGS: standard ioctl arguments 3170 * 3171 * Returns 0 if successful, else an error is returned with the remaining time in 3172 * the timeout parameter. 3173 * -ETIME: object is still busy after timeout 3174 * -ERESTARTSYS: signal interrupted the wait 3175 * -ENONENT: object doesn't exist 3176 * Also possible, but rare: 3177 * -EAGAIN: GPU wedged 3178 * -ENOMEM: damn 3179 * -ENODEV: Internal IRQ fail 3180 * -E?: The add request failed 3181 * 3182 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any 3183 * non-zero timeout parameter the wait ioctl will wait for the given number of 3184 * nanoseconds on an object becoming unbusy. Since the wait itself does so 3185 * without holding struct_mutex the object may become re-busied before this 3186 * function completes. A similar but shorter * race condition exists in the busy 3187 * ioctl 3188 */ 3189 int 3190 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 3191 { 3192 struct drm_i915_private *dev_priv = dev->dev_private; 3193 struct drm_i915_gem_wait *args = data; 3194 struct drm_i915_gem_object *obj; 3195 struct drm_i915_gem_request *req[I915_NUM_RINGS]; 3196 unsigned reset_counter; 3197 int i, n = 0; 3198 int ret; 3199 3200 if (args->flags != 0) 3201 return -EINVAL; 3202 3203 ret = i915_mutex_lock_interruptible(dev); 3204 if (ret) 3205 return ret; 3206 3207 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->bo_handle)); 3208 if (&obj->base == NULL) { 3209 mutex_unlock(&dev->struct_mutex); 3210 return -ENOENT; 3211 } 3212 3213 /* Need to make sure the object gets inactive eventually. */ 3214 ret = i915_gem_object_flush_active(obj); 3215 if (ret) 3216 goto out; 3217 3218 if (!obj->active) 3219 goto out; 3220 3221 /* Do this after OLR check to make sure we make forward progress polling 3222 * on this IOCTL with a timeout == 0 (like busy ioctl) 3223 */ 3224 if (args->timeout_ns == 0) { 3225 ret = -ETIME; 3226 goto out; 3227 } 3228 3229 drm_gem_object_unreference(&obj->base); 3230 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 3231 3232 for (i = 0; i < I915_NUM_RINGS; i++) { 3233 if (obj->last_read_req[i] == NULL) 3234 continue; 3235 3236 req[n++] = i915_gem_request_reference(obj->last_read_req[i]); 3237 } 3238 3239 mutex_unlock(&dev->struct_mutex); 3240 3241 for (i = 0; i < n; i++) { 3242 if (ret == 0) 3243 ret = __i915_wait_request(req[i], reset_counter, true, 3244 args->timeout_ns > 0 ? &args->timeout_ns : NULL, 3245 to_rps_client(file)); 3246 i915_gem_request_unreference__unlocked(req[i]); 3247 } 3248 return ret; 3249 3250 out: 3251 drm_gem_object_unreference(&obj->base); 3252 mutex_unlock(&dev->struct_mutex); 3253 return ret; 3254 } 3255 3256 static int 3257 __i915_gem_object_sync(struct drm_i915_gem_object *obj, 3258 struct intel_engine_cs *to, 3259 struct drm_i915_gem_request *from_req, 3260 struct drm_i915_gem_request **to_req) 3261 { 3262 struct intel_engine_cs *from; 3263 int ret; 3264 3265 from = i915_gem_request_get_ring(from_req); 3266 if (to == from) 3267 return 0; 3268 3269 if (i915_gem_request_completed(from_req, true)) 3270 return 0; 3271 3272 if (!i915_semaphore_is_enabled(obj->base.dev)) { 3273 struct drm_i915_private *i915 = to_i915(obj->base.dev); 3274 ret = __i915_wait_request(from_req, 3275 atomic_read(&i915->gpu_error.reset_counter), 3276 i915->mm.interruptible, 3277 NULL, 3278 &i915->rps.semaphores); 3279 if (ret) 3280 return ret; 3281 3282 i915_gem_object_retire_request(obj, from_req); 3283 } else { 3284 int idx = intel_ring_sync_index(from, to); 3285 u32 seqno = i915_gem_request_get_seqno(from_req); 3286 3287 WARN_ON(!to_req); 3288 3289 if (seqno <= from->semaphore.sync_seqno[idx]) 3290 return 0; 3291 3292 if (*to_req == NULL) { 3293 ret = i915_gem_request_alloc(to, to->default_context, to_req); 3294 if (ret) 3295 return ret; 3296 } 3297 3298 trace_i915_gem_ring_sync_to(*to_req, from, from_req); 3299 ret = to->semaphore.sync_to(*to_req, from, seqno); 3300 if (ret) 3301 return ret; 3302 3303 /* We use last_read_req because sync_to() 3304 * might have just caused seqno wrap under 3305 * the radar. 3306 */ 3307 from->semaphore.sync_seqno[idx] = 3308 i915_gem_request_get_seqno(obj->last_read_req[from->id]); 3309 } 3310 3311 return 0; 3312 } 3313 3314 /** 3315 * i915_gem_object_sync - sync an object to a ring. 3316 * 3317 * @obj: object which may be in use on another ring. 3318 * @to: ring we wish to use the object on. May be NULL. 3319 * @to_req: request we wish to use the object for. See below. 3320 * This will be allocated and returned if a request is 3321 * required but not passed in. 3322 * 3323 * This code is meant to abstract object synchronization with the GPU. 3324 * Calling with NULL implies synchronizing the object with the CPU 3325 * rather than a particular GPU ring. Conceptually we serialise writes 3326 * between engines inside the GPU. We only allow one engine to write 3327 * into a buffer at any time, but multiple readers. To ensure each has 3328 * a coherent view of memory, we must: 3329 * 3330 * - If there is an outstanding write request to the object, the new 3331 * request must wait for it to complete (either CPU or in hw, requests 3332 * on the same ring will be naturally ordered). 3333 * 3334 * - If we are a write request (pending_write_domain is set), the new 3335 * request must wait for outstanding read requests to complete. 3336 * 3337 * For CPU synchronisation (NULL to) no request is required. For syncing with 3338 * rings to_req must be non-NULL. However, a request does not have to be 3339 * pre-allocated. If *to_req is NULL and sync commands will be emitted then a 3340 * request will be allocated automatically and returned through *to_req. Note 3341 * that it is not guaranteed that commands will be emitted (because the system 3342 * might already be idle). Hence there is no need to create a request that 3343 * might never have any work submitted. Note further that if a request is 3344 * returned in *to_req, it is the responsibility of the caller to submit 3345 * that request (after potentially adding more work to it). 3346 * 3347 * Returns 0 if successful, else propagates up the lower layer error. 3348 */ 3349 int 3350 i915_gem_object_sync(struct drm_i915_gem_object *obj, 3351 struct intel_engine_cs *to, 3352 struct drm_i915_gem_request **to_req) 3353 { 3354 const bool readonly = obj->base.pending_write_domain == 0; 3355 struct drm_i915_gem_request *req[I915_NUM_RINGS]; 3356 int ret, i, n; 3357 3358 if (!obj->active) 3359 return 0; 3360 3361 if (to == NULL) 3362 return i915_gem_object_wait_rendering(obj, readonly); 3363 3364 n = 0; 3365 if (readonly) { 3366 if (obj->last_write_req) 3367 req[n++] = obj->last_write_req; 3368 } else { 3369 for (i = 0; i < I915_NUM_RINGS; i++) 3370 if (obj->last_read_req[i]) 3371 req[n++] = obj->last_read_req[i]; 3372 } 3373 for (i = 0; i < n; i++) { 3374 ret = __i915_gem_object_sync(obj, to, req[i], to_req); 3375 if (ret) 3376 return ret; 3377 } 3378 3379 return 0; 3380 } 3381 3382 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) 3383 { 3384 u32 old_write_domain, old_read_domains; 3385 3386 /* Force a pagefault for domain tracking on next user access */ 3387 i915_gem_release_mmap(obj); 3388 3389 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3390 return; 3391 3392 /* Wait for any direct GTT access to complete */ 3393 mb(); 3394 3395 old_read_domains = obj->base.read_domains; 3396 old_write_domain = obj->base.write_domain; 3397 3398 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT; 3399 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT; 3400 3401 trace_i915_gem_object_change_domain(obj, 3402 old_read_domains, 3403 old_write_domain); 3404 } 3405 3406 static int __i915_vma_unbind(struct i915_vma *vma, bool wait) 3407 { 3408 struct drm_i915_gem_object *obj = vma->obj; 3409 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3410 int ret; 3411 3412 if (list_empty(&vma->vma_link)) 3413 return 0; 3414 3415 if (!drm_mm_node_allocated(&vma->node)) { 3416 i915_gem_vma_destroy(vma); 3417 return 0; 3418 } 3419 3420 if (vma->pin_count) 3421 return -EBUSY; 3422 3423 BUG_ON(obj->pages == NULL); 3424 3425 if (wait) { 3426 ret = i915_gem_object_wait_rendering(obj, false); 3427 if (ret) 3428 return ret; 3429 } 3430 3431 if (i915_is_ggtt(vma->vm) && 3432 vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { 3433 i915_gem_object_finish_gtt(obj); 3434 3435 /* release the fence reg _after_ flushing */ 3436 ret = i915_gem_object_put_fence(obj); 3437 if (ret) 3438 return ret; 3439 } 3440 3441 trace_i915_vma_unbind(vma); 3442 3443 vma->vm->unbind_vma(vma); 3444 vma->bound = 0; 3445 3446 list_del_init(&vma->mm_list); 3447 if (i915_is_ggtt(vma->vm)) { 3448 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { 3449 obj->map_and_fenceable = false; 3450 } else if (vma->ggtt_view.pages) { 3451 sg_free_table(vma->ggtt_view.pages); 3452 kfree(vma->ggtt_view.pages); 3453 } 3454 vma->ggtt_view.pages = NULL; 3455 } 3456 3457 drm_mm_remove_node(&vma->node); 3458 i915_gem_vma_destroy(vma); 3459 3460 /* Since the unbound list is global, only move to that list if 3461 * no more VMAs exist. */ 3462 if (list_empty(&obj->vma_list)) 3463 list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list); 3464 3465 /* And finally now the object is completely decoupled from this vma, 3466 * we can drop its hold on the backing storage and allow it to be 3467 * reaped by the shrinker. 3468 */ 3469 i915_gem_object_unpin_pages(obj); 3470 3471 return 0; 3472 } 3473 3474 int i915_vma_unbind(struct i915_vma *vma) 3475 { 3476 return __i915_vma_unbind(vma, true); 3477 } 3478 3479 int __i915_vma_unbind_no_wait(struct i915_vma *vma) 3480 { 3481 return __i915_vma_unbind(vma, false); 3482 } 3483 3484 int i915_gpu_idle(struct drm_device *dev) 3485 { 3486 struct drm_i915_private *dev_priv = dev->dev_private; 3487 struct intel_engine_cs *ring; 3488 int ret, i; 3489 3490 /* Flush everything onto the inactive list. */ 3491 for_each_ring(ring, dev_priv, i) { 3492 if (!i915.enable_execlists) { 3493 struct drm_i915_gem_request *req; 3494 3495 ret = i915_gem_request_alloc(ring, ring->default_context, &req); 3496 if (ret) 3497 return ret; 3498 3499 ret = i915_switch_context(req); 3500 if (ret) { 3501 i915_gem_request_cancel(req); 3502 return ret; 3503 } 3504 3505 i915_add_request_no_flush(req); 3506 } 3507 3508 ret = intel_ring_idle(ring); 3509 if (ret) 3510 return ret; 3511 } 3512 3513 WARN_ON(i915_verify_lists(dev)); 3514 return 0; 3515 } 3516 3517 static bool i915_gem_valid_gtt_space(struct i915_vma *vma, 3518 unsigned long cache_level) 3519 { 3520 struct drm_mm_node *gtt_space = &vma->node; 3521 struct drm_mm_node *other; 3522 3523 /* 3524 * On some machines we have to be careful when putting differing types 3525 * of snoopable memory together to avoid the prefetcher crossing memory 3526 * domains and dying. During vm initialisation, we decide whether or not 3527 * these constraints apply and set the drm_mm.color_adjust 3528 * appropriately. 3529 */ 3530 if (vma->vm->mm.color_adjust == NULL) 3531 return true; 3532 3533 if (!drm_mm_node_allocated(gtt_space)) 3534 return true; 3535 3536 if (list_empty(>t_space->node_list)) 3537 return true; 3538 3539 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list); 3540 if (other->allocated && !other->hole_follows && other->color != cache_level) 3541 return false; 3542 3543 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list); 3544 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level) 3545 return false; 3546 3547 return true; 3548 } 3549 3550 /** 3551 * Finds free space in the GTT aperture and binds the object or a view of it 3552 * there. 3553 */ 3554 static struct i915_vma * 3555 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, 3556 struct i915_address_space *vm, 3557 const struct i915_ggtt_view *ggtt_view, 3558 unsigned alignment, 3559 uint64_t flags) 3560 { 3561 struct drm_device *dev = obj->base.dev; 3562 struct drm_i915_private *dev_priv = dev->dev_private; 3563 u32 fence_alignment, unfenced_alignment; 3564 u32 search_flag, alloc_flag; 3565 u64 start, end; 3566 u64 size, fence_size; 3567 struct i915_vma *vma; 3568 int ret; 3569 3570 if (i915_is_ggtt(vm)) { 3571 u32 view_size; 3572 3573 if (WARN_ON(!ggtt_view)) 3574 return ERR_PTR(-EINVAL); 3575 3576 view_size = i915_ggtt_view_size(obj, ggtt_view); 3577 3578 fence_size = i915_gem_get_gtt_size(dev, 3579 view_size, 3580 obj->tiling_mode); 3581 fence_alignment = i915_gem_get_gtt_alignment(dev, 3582 view_size, 3583 obj->tiling_mode, 3584 true); 3585 unfenced_alignment = i915_gem_get_gtt_alignment(dev, 3586 view_size, 3587 obj->tiling_mode, 3588 false); 3589 size = flags & PIN_MAPPABLE ? fence_size : view_size; 3590 } else { 3591 fence_size = i915_gem_get_gtt_size(dev, 3592 obj->base.size, 3593 obj->tiling_mode); 3594 fence_alignment = i915_gem_get_gtt_alignment(dev, 3595 obj->base.size, 3596 obj->tiling_mode, 3597 true); 3598 unfenced_alignment = 3599 i915_gem_get_gtt_alignment(dev, 3600 obj->base.size, 3601 obj->tiling_mode, 3602 false); 3603 size = flags & PIN_MAPPABLE ? fence_size : obj->base.size; 3604 } 3605 3606 start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; 3607 end = vm->total; 3608 if (flags & PIN_MAPPABLE) 3609 end = min_t(u64, end, dev_priv->gtt.mappable_end); 3610 if (flags & PIN_ZONE_4G) 3611 end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE); 3612 3613 if (alignment == 0) 3614 alignment = flags & PIN_MAPPABLE ? fence_alignment : 3615 unfenced_alignment; 3616 if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) { 3617 DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n", 3618 ggtt_view ? ggtt_view->type : 0, 3619 alignment); 3620 return ERR_PTR(-EINVAL); 3621 } 3622 3623 /* If binding the object/GGTT view requires more space than the entire 3624 * aperture has, reject it early before evicting everything in a vain 3625 * attempt to find space. 3626 */ 3627 if (size > end) { 3628 DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%lu > %s aperture=%lu\n", 3629 ggtt_view ? ggtt_view->type : 0, 3630 size, 3631 flags & PIN_MAPPABLE ? "mappable" : "total", 3632 end); 3633 return ERR_PTR(-E2BIG); 3634 } 3635 3636 ret = i915_gem_object_get_pages(obj); 3637 if (ret) 3638 return ERR_PTR(ret); 3639 3640 i915_gem_object_pin_pages(obj); 3641 3642 vma = ggtt_view ? i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) : 3643 i915_gem_obj_lookup_or_create_vma(obj, vm); 3644 3645 if (IS_ERR(vma)) 3646 goto err_unpin; 3647 3648 if (flags & PIN_OFFSET_FIXED) { 3649 uint64_t offset = flags & PIN_OFFSET_MASK; 3650 3651 if (offset & (alignment - 1) || offset + size > end) { 3652 ret = -EINVAL; 3653 goto err_free_vma; 3654 } 3655 vma->node.start = offset; 3656 vma->node.size = size; 3657 vma->node.color = obj->cache_level; 3658 ret = drm_mm_reserve_node(&vm->mm, &vma->node); 3659 if (ret) { 3660 ret = i915_gem_evict_for_vma(vma); 3661 if (ret == 0) 3662 ret = drm_mm_reserve_node(&vm->mm, &vma->node); 3663 } 3664 if (ret) 3665 goto err_free_vma; 3666 } else { 3667 if (flags & PIN_HIGH) { 3668 search_flag = DRM_MM_SEARCH_BELOW; 3669 alloc_flag = DRM_MM_CREATE_TOP; 3670 } else { 3671 search_flag = DRM_MM_SEARCH_DEFAULT; 3672 alloc_flag = DRM_MM_CREATE_DEFAULT; 3673 } 3674 3675 search_free: 3676 ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, 3677 size, alignment, 3678 obj->cache_level, 3679 start, end, 3680 search_flag, 3681 alloc_flag); 3682 if (ret) { 3683 ret = i915_gem_evict_something(dev, vm, size, alignment, 3684 obj->cache_level, 3685 start, end, 3686 flags); 3687 if (ret == 0) 3688 goto search_free; 3689 3690 goto err_free_vma; 3691 } 3692 } 3693 if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) { 3694 ret = -EINVAL; 3695 goto err_remove_node; 3696 } 3697 3698 trace_i915_vma_bind(vma, flags); 3699 ret = i915_vma_bind(vma, obj->cache_level, flags); 3700 if (ret) 3701 goto err_remove_node; 3702 3703 list_move_tail(&obj->global_list, &dev_priv->mm.bound_list); 3704 list_add_tail(&vma->mm_list, &vm->inactive_list); 3705 3706 return vma; 3707 3708 err_remove_node: 3709 drm_mm_remove_node(&vma->node); 3710 err_free_vma: 3711 i915_gem_vma_destroy(vma); 3712 vma = ERR_PTR(ret); 3713 err_unpin: 3714 i915_gem_object_unpin_pages(obj); 3715 return vma; 3716 } 3717 3718 bool 3719 i915_gem_clflush_object(struct drm_i915_gem_object *obj, 3720 bool force) 3721 { 3722 /* If we don't have a page list set up, then we're not pinned 3723 * to GPU, and we can ignore the cache flush because it'll happen 3724 * again at bind time. 3725 */ 3726 if (obj->pages == NULL) 3727 return false; 3728 3729 /* 3730 * Stolen memory is always coherent with the GPU as it is explicitly 3731 * marked as wc by the system, or the system is cache-coherent. 3732 */ 3733 if (obj->stolen || obj->phys_handle) 3734 return false; 3735 3736 /* If the GPU is snooping the contents of the CPU cache, 3737 * we do not need to manually clear the CPU cache lines. However, 3738 * the caches are only snooped when the render cache is 3739 * flushed/invalidated. As we always have to emit invalidations 3740 * and flushes when moving into and out of the RENDER domain, correct 3741 * snooping behaviour occurs naturally as the result of our domain 3742 * tracking. 3743 */ 3744 if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) { 3745 obj->cache_dirty = true; 3746 return false; 3747 } 3748 3749 trace_i915_gem_object_clflush(obj); 3750 drm_clflush_sg(obj->pages); 3751 obj->cache_dirty = false; 3752 3753 return true; 3754 } 3755 3756 /** Flushes the GTT write domain for the object if it's dirty. */ 3757 static void 3758 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) 3759 { 3760 uint32_t old_write_domain; 3761 3762 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) 3763 return; 3764 3765 /* No actual flushing is required for the GTT write domain. Writes 3766 * to it immediately go to main memory as far as we know, so there's 3767 * no chipset flush. It also doesn't land in render cache. 3768 * 3769 * However, we do have to enforce the order so that all writes through 3770 * the GTT land before any writes to the device, such as updates to 3771 * the GATT itself. 3772 */ 3773 wmb(); 3774 3775 old_write_domain = obj->base.write_domain; 3776 obj->base.write_domain = 0; 3777 3778 intel_fb_obj_flush(obj, false, ORIGIN_GTT); 3779 3780 trace_i915_gem_object_change_domain(obj, 3781 obj->base.read_domains, 3782 old_write_domain); 3783 } 3784 3785 /** Flushes the CPU write domain for the object if it's dirty. */ 3786 static void 3787 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) 3788 { 3789 uint32_t old_write_domain; 3790 3791 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) 3792 return; 3793 3794 if (i915_gem_clflush_object(obj, obj->pin_display)) 3795 i915_gem_chipset_flush(obj->base.dev); 3796 3797 old_write_domain = obj->base.write_domain; 3798 obj->base.write_domain = 0; 3799 3800 intel_fb_obj_flush(obj, false, ORIGIN_CPU); 3801 3802 trace_i915_gem_object_change_domain(obj, 3803 obj->base.read_domains, 3804 old_write_domain); 3805 } 3806 3807 /** 3808 * Moves a single object to the GTT read, and possibly write domain. 3809 * 3810 * This function returns when the move is complete, including waiting on 3811 * flushes to occur. 3812 */ 3813 int 3814 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 3815 { 3816 uint32_t old_write_domain, old_read_domains; 3817 struct i915_vma *vma; 3818 int ret; 3819 3820 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) 3821 return 0; 3822 3823 ret = i915_gem_object_wait_rendering(obj, !write); 3824 if (ret) 3825 return ret; 3826 3827 /* Flush and acquire obj->pages so that we are coherent through 3828 * direct access in memory with previous cached writes through 3829 * shmemfs and that our cache domain tracking remains valid. 3830 * For example, if the obj->filp was moved to swap without us 3831 * being notified and releasing the pages, we would mistakenly 3832 * continue to assume that the obj remained out of the CPU cached 3833 * domain. 3834 */ 3835 ret = i915_gem_object_get_pages(obj); 3836 if (ret) 3837 return ret; 3838 3839 i915_gem_object_flush_cpu_write_domain(obj); 3840 3841 /* Serialise direct access to this object with the barriers for 3842 * coherent writes from the GPU, by effectively invalidating the 3843 * GTT domain upon first access. 3844 */ 3845 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3846 mb(); 3847 3848 old_write_domain = obj->base.write_domain; 3849 old_read_domains = obj->base.read_domains; 3850 3851 /* It should now be out of any other write domains, and we can update 3852 * the domain values for our changes. 3853 */ 3854 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 3855 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 3856 if (write) { 3857 obj->base.read_domains = I915_GEM_DOMAIN_GTT; 3858 obj->base.write_domain = I915_GEM_DOMAIN_GTT; 3859 obj->dirty = 1; 3860 } 3861 3862 trace_i915_gem_object_change_domain(obj, 3863 old_read_domains, 3864 old_write_domain); 3865 3866 /* And bump the LRU for this access */ 3867 vma = i915_gem_obj_to_ggtt(obj); 3868 if (vma && drm_mm_node_allocated(&vma->node) && !obj->active) 3869 list_move_tail(&vma->mm_list, 3870 &to_i915(obj->base.dev)->gtt.base.inactive_list); 3871 3872 return 0; 3873 } 3874 3875 /** 3876 * Changes the cache-level of an object across all VMA. 3877 * 3878 * After this function returns, the object will be in the new cache-level 3879 * across all GTT and the contents of the backing storage will be coherent, 3880 * with respect to the new cache-level. In order to keep the backing storage 3881 * coherent for all users, we only allow a single cache level to be set 3882 * globally on the object and prevent it from being changed whilst the 3883 * hardware is reading from the object. That is if the object is currently 3884 * on the scanout it will be set to uncached (or equivalent display 3885 * cache coherency) and all non-MOCS GPU access will also be uncached so 3886 * that all direct access to the scanout remains coherent. 3887 */ 3888 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 3889 enum i915_cache_level cache_level) 3890 { 3891 struct drm_device *dev = obj->base.dev; 3892 struct i915_vma *vma, *next; 3893 bool bound = false; 3894 int ret = 0; 3895 3896 if (obj->cache_level == cache_level) 3897 goto out; 3898 3899 /* Inspect the list of currently bound VMA and unbind any that would 3900 * be invalid given the new cache-level. This is principally to 3901 * catch the issue of the CS prefetch crossing page boundaries and 3902 * reading an invalid PTE on older architectures. 3903 */ 3904 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) { 3905 if (!drm_mm_node_allocated(&vma->node)) 3906 continue; 3907 3908 if (vma->pin_count) { 3909 DRM_DEBUG("can not change the cache level of pinned objects\n"); 3910 return -EBUSY; 3911 } 3912 3913 if (!i915_gem_valid_gtt_space(vma, cache_level)) { 3914 ret = i915_vma_unbind(vma); 3915 if (ret) 3916 return ret; 3917 } else 3918 bound = true; 3919 } 3920 3921 /* We can reuse the existing drm_mm nodes but need to change the 3922 * cache-level on the PTE. We could simply unbind them all and 3923 * rebind with the correct cache-level on next use. However since 3924 * we already have a valid slot, dma mapping, pages etc, we may as 3925 * rewrite the PTE in the belief that doing so tramples upon less 3926 * state and so involves less work. 3927 */ 3928 if (bound) { 3929 /* Before we change the PTE, the GPU must not be accessing it. 3930 * If we wait upon the object, we know that all the bound 3931 * VMA are no longer active. 3932 */ 3933 ret = i915_gem_object_wait_rendering(obj, false); 3934 if (ret) 3935 return ret; 3936 3937 if (!HAS_LLC(dev) && cache_level != I915_CACHE_NONE) { 3938 /* Access to snoopable pages through the GTT is 3939 * incoherent and on some machines causes a hard 3940 * lockup. Relinquish the CPU mmaping to force 3941 * userspace to refault in the pages and we can 3942 * then double check if the GTT mapping is still 3943 * valid for that pointer access. 3944 */ 3945 i915_gem_release_mmap(obj); 3946 3947 /* As we no longer need a fence for GTT access, 3948 * we can relinquish it now (and so prevent having 3949 * to steal a fence from someone else on the next 3950 * fence request). Note GPU activity would have 3951 * dropped the fence as all snoopable access is 3952 * supposed to be linear. 3953 */ 3954 ret = i915_gem_object_put_fence(obj); 3955 if (ret) 3956 return ret; 3957 } else { 3958 /* We either have incoherent backing store and 3959 * so no GTT access or the architecture is fully 3960 * coherent. In such cases, existing GTT mmaps 3961 * ignore the cache bit in the PTE and we can 3962 * rewrite it without confusing the GPU or having 3963 * to force userspace to fault back in its mmaps. 3964 */ 3965 } 3966 3967 list_for_each_entry(vma, &obj->vma_list, vma_link) { 3968 if (!drm_mm_node_allocated(&vma->node)) 3969 continue; 3970 3971 ret = i915_vma_bind(vma, cache_level, PIN_UPDATE); 3972 if (ret) 3973 return ret; 3974 } 3975 } 3976 3977 list_for_each_entry(vma, &obj->vma_list, vma_link) 3978 vma->node.color = cache_level; 3979 obj->cache_level = cache_level; 3980 3981 out: 3982 /* Flush the dirty CPU caches to the backing storage so that the 3983 * object is now coherent at its new cache level (with respect 3984 * to the access domain). 3985 */ 3986 if (obj->cache_dirty && 3987 obj->base.write_domain != I915_GEM_DOMAIN_CPU && 3988 cpu_write_needs_clflush(obj)) { 3989 if (i915_gem_clflush_object(obj, true)) 3990 i915_gem_chipset_flush(obj->base.dev); 3991 } 3992 3993 return 0; 3994 } 3995 3996 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 3997 struct drm_file *file) 3998 { 3999 struct drm_i915_gem_caching *args = data; 4000 struct drm_i915_gem_object *obj; 4001 4002 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4003 if (&obj->base == NULL) 4004 return -ENOENT; 4005 4006 switch (obj->cache_level) { 4007 case I915_CACHE_LLC: 4008 case I915_CACHE_L3_LLC: 4009 args->caching = I915_CACHING_CACHED; 4010 break; 4011 4012 case I915_CACHE_WT: 4013 args->caching = I915_CACHING_DISPLAY; 4014 break; 4015 4016 default: 4017 args->caching = I915_CACHING_NONE; 4018 break; 4019 } 4020 4021 drm_gem_object_unreference_unlocked(&obj->base); 4022 return 0; 4023 } 4024 4025 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 4026 struct drm_file *file) 4027 { 4028 struct drm_i915_private *dev_priv = dev->dev_private; 4029 struct drm_i915_gem_caching *args = data; 4030 struct drm_i915_gem_object *obj; 4031 enum i915_cache_level level; 4032 int ret; 4033 4034 switch (args->caching) { 4035 case I915_CACHING_NONE: 4036 level = I915_CACHE_NONE; 4037 break; 4038 case I915_CACHING_CACHED: 4039 /* 4040 * Due to a HW issue on BXT A stepping, GPU stores via a 4041 * snooped mapping may leave stale data in a corresponding CPU 4042 * cacheline, whereas normally such cachelines would get 4043 * invalidated. 4044 */ 4045 if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) 4046 return -ENODEV; 4047 4048 level = I915_CACHE_LLC; 4049 break; 4050 case I915_CACHING_DISPLAY: 4051 level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE; 4052 break; 4053 default: 4054 return -EINVAL; 4055 } 4056 4057 intel_runtime_pm_get(dev_priv); 4058 4059 ret = i915_mutex_lock_interruptible(dev); 4060 if (ret) 4061 goto rpm_put; 4062 4063 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4064 if (&obj->base == NULL) { 4065 ret = -ENOENT; 4066 goto unlock; 4067 } 4068 4069 ret = i915_gem_object_set_cache_level(obj, level); 4070 4071 drm_gem_object_unreference(&obj->base); 4072 unlock: 4073 mutex_unlock(&dev->struct_mutex); 4074 rpm_put: 4075 intel_runtime_pm_put(dev_priv); 4076 4077 return ret; 4078 } 4079 4080 /* 4081 * Prepare buffer for display plane (scanout, cursors, etc). 4082 * Can be called from an uninterruptible phase (modesetting) and allows 4083 * any flushes to be pipelined (for pageflips). 4084 */ 4085 int 4086 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 4087 u32 alignment, 4088 const struct i915_ggtt_view *view) 4089 { 4090 u32 old_read_domains, old_write_domain; 4091 int ret; 4092 4093 /* Mark the pin_display early so that we account for the 4094 * display coherency whilst setting up the cache domains. 4095 */ 4096 obj->pin_display++; 4097 4098 /* The display engine is not coherent with the LLC cache on gen6. As 4099 * a result, we make sure that the pinning that is about to occur is 4100 * done with uncached PTEs. This is lowest common denominator for all 4101 * chipsets. 4102 * 4103 * However for gen6+, we could do better by using the GFDT bit instead 4104 * of uncaching, which would allow us to flush all the LLC-cached data 4105 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 4106 */ 4107 ret = i915_gem_object_set_cache_level(obj, 4108 HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE); 4109 if (ret) 4110 goto err_unpin_display; 4111 4112 /* As the user may map the buffer once pinned in the display plane 4113 * (e.g. libkms for the bootup splash), we have to ensure that we 4114 * always use map_and_fenceable for all scanout buffers. 4115 */ 4116 ret = i915_gem_object_ggtt_pin(obj, view, alignment, 4117 view->type == I915_GGTT_VIEW_NORMAL ? 4118 PIN_MAPPABLE : 0); 4119 if (ret) 4120 goto err_unpin_display; 4121 4122 i915_gem_object_flush_cpu_write_domain(obj); 4123 4124 old_write_domain = obj->base.write_domain; 4125 old_read_domains = obj->base.read_domains; 4126 4127 /* It should now be out of any other write domains, and we can update 4128 * the domain values for our changes. 4129 */ 4130 obj->base.write_domain = 0; 4131 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 4132 4133 trace_i915_gem_object_change_domain(obj, 4134 old_read_domains, 4135 old_write_domain); 4136 4137 return 0; 4138 4139 err_unpin_display: 4140 obj->pin_display--; 4141 return ret; 4142 } 4143 4144 void 4145 i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj, 4146 const struct i915_ggtt_view *view) 4147 { 4148 if (WARN_ON(obj->pin_display == 0)) 4149 return; 4150 4151 i915_gem_object_ggtt_unpin_view(obj, view); 4152 4153 obj->pin_display--; 4154 } 4155 4156 /** 4157 * Moves a single object to the CPU read, and possibly write domain. 4158 * 4159 * This function returns when the move is complete, including waiting on 4160 * flushes to occur. 4161 */ 4162 int 4163 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 4164 { 4165 uint32_t old_write_domain, old_read_domains; 4166 int ret; 4167 4168 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) 4169 return 0; 4170 4171 ret = i915_gem_object_wait_rendering(obj, !write); 4172 if (ret) 4173 return ret; 4174 4175 i915_gem_object_flush_gtt_write_domain(obj); 4176 4177 old_write_domain = obj->base.write_domain; 4178 old_read_domains = obj->base.read_domains; 4179 4180 /* Flush the CPU cache if it's still invalid. */ 4181 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { 4182 i915_gem_clflush_object(obj, false); 4183 4184 obj->base.read_domains |= I915_GEM_DOMAIN_CPU; 4185 } 4186 4187 /* It should now be out of any other write domains, and we can update 4188 * the domain values for our changes. 4189 */ 4190 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 4191 4192 /* If we're writing through the CPU, then the GPU read domains will 4193 * need to be invalidated at next use. 4194 */ 4195 if (write) { 4196 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4197 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4198 } 4199 4200 trace_i915_gem_object_change_domain(obj, 4201 old_read_domains, 4202 old_write_domain); 4203 4204 return 0; 4205 } 4206 4207 /* Throttle our rendering by waiting until the ring has completed our requests 4208 * emitted over 20 msec ago. 4209 * 4210 * Note that if we were to use the current jiffies each time around the loop, 4211 * we wouldn't escape the function with any frames outstanding if the time to 4212 * render a frame was over 20ms. 4213 * 4214 * This should get us reasonable parallelism between CPU and GPU but also 4215 * relatively low latency when blocking on a particular request to finish. 4216 */ 4217 static int 4218 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 4219 { 4220 struct drm_i915_private *dev_priv = dev->dev_private; 4221 struct drm_i915_file_private *file_priv = file->driver_priv; 4222 unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES; 4223 struct drm_i915_gem_request *request, *target = NULL; 4224 unsigned reset_counter; 4225 int ret; 4226 4227 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 4228 if (ret) 4229 return ret; 4230 4231 ret = i915_gem_check_wedge(&dev_priv->gpu_error, false); 4232 if (ret) 4233 return ret; 4234 4235 spin_lock(&file_priv->mm.lock); 4236 list_for_each_entry(request, &file_priv->mm.request_list, client_list) { 4237 if (time_after_eq(request->emitted_jiffies, recent_enough)) 4238 break; 4239 4240 /* 4241 * Note that the request might not have been submitted yet. 4242 * In which case emitted_jiffies will be zero. 4243 */ 4244 if (!request->emitted_jiffies) 4245 continue; 4246 4247 target = request; 4248 } 4249 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 4250 if (target) 4251 i915_gem_request_reference(target); 4252 spin_unlock(&file_priv->mm.lock); 4253 4254 if (target == NULL) 4255 return 0; 4256 4257 ret = __i915_wait_request(target, reset_counter, true, NULL, NULL); 4258 if (ret == 0) 4259 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0); 4260 4261 i915_gem_request_unreference__unlocked(target); 4262 4263 return ret; 4264 } 4265 4266 static bool 4267 i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags) 4268 { 4269 struct drm_i915_gem_object *obj = vma->obj; 4270 4271 if (alignment && 4272 vma->node.start & (alignment - 1)) 4273 return true; 4274 4275 if (flags & PIN_MAPPABLE && !obj->map_and_fenceable) 4276 return true; 4277 4278 if (flags & PIN_OFFSET_BIAS && 4279 vma->node.start < (flags & PIN_OFFSET_MASK)) 4280 return true; 4281 4282 if (flags & PIN_OFFSET_FIXED && 4283 vma->node.start != (flags & PIN_OFFSET_MASK)) 4284 return true; 4285 4286 return false; 4287 } 4288 4289 void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) 4290 { 4291 struct drm_i915_gem_object *obj = vma->obj; 4292 bool mappable, fenceable; 4293 u32 fence_size, fence_alignment; 4294 4295 fence_size = i915_gem_get_gtt_size(obj->base.dev, 4296 obj->base.size, 4297 obj->tiling_mode); 4298 fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev, 4299 obj->base.size, 4300 obj->tiling_mode, 4301 true); 4302 4303 fenceable = (vma->node.size == fence_size && 4304 (vma->node.start & (fence_alignment - 1)) == 0); 4305 4306 mappable = (vma->node.start + fence_size <= 4307 to_i915(obj->base.dev)->gtt.mappable_end); 4308 4309 obj->map_and_fenceable = mappable && fenceable; 4310 } 4311 4312 static int 4313 i915_gem_object_do_pin(struct drm_i915_gem_object *obj, 4314 struct i915_address_space *vm, 4315 const struct i915_ggtt_view *ggtt_view, 4316 uint32_t alignment, 4317 uint64_t flags) 4318 { 4319 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 4320 struct i915_vma *vma; 4321 unsigned bound; 4322 int ret; 4323 4324 if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base)) 4325 return -ENODEV; 4326 4327 if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm))) 4328 return -EINVAL; 4329 4330 if (WARN_ON((flags & (PIN_MAPPABLE | PIN_GLOBAL)) == PIN_MAPPABLE)) 4331 return -EINVAL; 4332 4333 if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view)) 4334 return -EINVAL; 4335 4336 vma = ggtt_view ? i915_gem_obj_to_ggtt_view(obj, ggtt_view) : 4337 i915_gem_obj_to_vma(obj, vm); 4338 4339 if (IS_ERR(vma)) 4340 return PTR_ERR(vma); 4341 4342 if (vma) { 4343 if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) 4344 return -EBUSY; 4345 4346 if (i915_vma_misplaced(vma, alignment, flags)) { 4347 WARN(vma->pin_count, 4348 "bo is already pinned in %s with incorrect alignment:" 4349 " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d," 4350 " obj->map_and_fenceable=%d\n", 4351 ggtt_view ? "ggtt" : "ppgtt", 4352 upper_32_bits(vma->node.start), 4353 lower_32_bits(vma->node.start), 4354 alignment, 4355 !!(flags & PIN_MAPPABLE), 4356 obj->map_and_fenceable); 4357 ret = i915_vma_unbind(vma); 4358 if (ret) 4359 return ret; 4360 4361 vma = NULL; 4362 } 4363 } 4364 4365 bound = vma ? vma->bound : 0; 4366 if (vma == NULL || !drm_mm_node_allocated(&vma->node)) { 4367 vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment, 4368 flags); 4369 if (IS_ERR(vma)) 4370 return PTR_ERR(vma); 4371 } else { 4372 ret = i915_vma_bind(vma, obj->cache_level, flags); 4373 if (ret) 4374 return ret; 4375 } 4376 4377 if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL && 4378 (bound ^ vma->bound) & GLOBAL_BIND) { 4379 __i915_vma_set_map_and_fenceable(vma); 4380 WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable); 4381 } 4382 4383 vma->pin_count++; 4384 return 0; 4385 } 4386 4387 int 4388 i915_gem_object_pin(struct drm_i915_gem_object *obj, 4389 struct i915_address_space *vm, 4390 uint32_t alignment, 4391 uint64_t flags) 4392 { 4393 return i915_gem_object_do_pin(obj, vm, 4394 i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL, 4395 alignment, flags); 4396 } 4397 4398 int 4399 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, 4400 const struct i915_ggtt_view *view, 4401 uint32_t alignment, 4402 uint64_t flags) 4403 { 4404 if (WARN_ONCE(!view, "no view specified")) 4405 return -EINVAL; 4406 4407 return i915_gem_object_do_pin(obj, i915_obj_to_ggtt(obj), view, 4408 alignment, flags | PIN_GLOBAL); 4409 } 4410 4411 void 4412 i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj, 4413 const struct i915_ggtt_view *view) 4414 { 4415 struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view); 4416 4417 BUG_ON(!vma); 4418 WARN_ON(vma->pin_count == 0); 4419 WARN_ON(!i915_gem_obj_ggtt_bound_view(obj, view)); 4420 4421 --vma->pin_count; 4422 } 4423 4424 int 4425 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 4426 struct drm_file *file) 4427 { 4428 struct drm_i915_gem_busy *args = data; 4429 struct drm_i915_gem_object *obj; 4430 int ret; 4431 4432 ret = i915_mutex_lock_interruptible(dev); 4433 if (ret) 4434 return ret; 4435 4436 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4437 if (&obj->base == NULL) { 4438 ret = -ENOENT; 4439 goto unlock; 4440 } 4441 4442 /* Count all active objects as busy, even if they are currently not used 4443 * by the gpu. Users of this interface expect objects to eventually 4444 * become non-busy without any further actions, therefore emit any 4445 * necessary flushes here. 4446 */ 4447 ret = i915_gem_object_flush_active(obj); 4448 if (ret) 4449 goto unref; 4450 4451 BUILD_BUG_ON(I915_NUM_RINGS > 16); 4452 args->busy = obj->active << 16; 4453 if (obj->last_write_req) 4454 args->busy |= obj->last_write_req->ring->id; 4455 4456 unref: 4457 drm_gem_object_unreference(&obj->base); 4458 unlock: 4459 mutex_unlock(&dev->struct_mutex); 4460 return ret; 4461 } 4462 4463 int 4464 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 4465 struct drm_file *file_priv) 4466 { 4467 return i915_gem_ring_throttle(dev, file_priv); 4468 } 4469 4470 int 4471 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 4472 struct drm_file *file_priv) 4473 { 4474 struct drm_i915_private *dev_priv = dev->dev_private; 4475 struct drm_i915_gem_madvise *args = data; 4476 struct drm_i915_gem_object *obj; 4477 int ret; 4478 4479 switch (args->madv) { 4480 case I915_MADV_DONTNEED: 4481 case I915_MADV_WILLNEED: 4482 break; 4483 default: 4484 return -EINVAL; 4485 } 4486 4487 ret = i915_mutex_lock_interruptible(dev); 4488 if (ret) 4489 return ret; 4490 4491 obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle)); 4492 if (&obj->base == NULL) { 4493 ret = -ENOENT; 4494 goto unlock; 4495 } 4496 4497 if (i915_gem_obj_is_pinned(obj)) { 4498 ret = -EINVAL; 4499 goto out; 4500 } 4501 4502 if (obj->pages && 4503 obj->tiling_mode != I915_TILING_NONE && 4504 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 4505 if (obj->madv == I915_MADV_WILLNEED) 4506 i915_gem_object_unpin_pages(obj); 4507 if (args->madv == I915_MADV_WILLNEED) 4508 i915_gem_object_pin_pages(obj); 4509 } 4510 4511 if (obj->madv != __I915_MADV_PURGED) 4512 obj->madv = args->madv; 4513 4514 /* if the object is no longer attached, discard its backing storage */ 4515 if (obj->madv == I915_MADV_DONTNEED && obj->pages == NULL) 4516 i915_gem_object_truncate(obj); 4517 4518 args->retained = obj->madv != __I915_MADV_PURGED; 4519 4520 out: 4521 drm_gem_object_unreference(&obj->base); 4522 unlock: 4523 mutex_unlock(&dev->struct_mutex); 4524 return ret; 4525 } 4526 4527 void i915_gem_object_init(struct drm_i915_gem_object *obj, 4528 const struct drm_i915_gem_object_ops *ops) 4529 { 4530 int i; 4531 4532 INIT_LIST_HEAD(&obj->global_list); 4533 for (i = 0; i < I915_NUM_RINGS; i++) 4534 INIT_LIST_HEAD(&obj->ring_list[i]); 4535 INIT_LIST_HEAD(&obj->obj_exec_link); 4536 INIT_LIST_HEAD(&obj->vma_list); 4537 INIT_LIST_HEAD(&obj->batch_pool_link); 4538 4539 obj->ops = ops; 4540 4541 obj->fence_reg = I915_FENCE_REG_NONE; 4542 obj->madv = I915_MADV_WILLNEED; 4543 4544 i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size); 4545 } 4546 4547 static const struct drm_i915_gem_object_ops i915_gem_object_ops = { 4548 .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE, 4549 .get_pages = i915_gem_object_get_pages_gtt, 4550 .put_pages = i915_gem_object_put_pages_gtt, 4551 }; 4552 4553 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, 4554 size_t size) 4555 { 4556 struct drm_i915_gem_object *obj; 4557 #if 0 4558 struct address_space *mapping; 4559 gfp_t mask; 4560 #endif 4561 4562 obj = i915_gem_object_alloc(dev); 4563 if (obj == NULL) 4564 return NULL; 4565 4566 if (drm_gem_object_init(dev, &obj->base, size) != 0) { 4567 i915_gem_object_free(obj); 4568 return NULL; 4569 } 4570 4571 #if 0 4572 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 4573 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) { 4574 /* 965gm cannot relocate objects above 4GiB. */ 4575 mask &= ~__GFP_HIGHMEM; 4576 mask |= __GFP_DMA32; 4577 } 4578 4579 mapping = file_inode(obj->base.filp)->i_mapping; 4580 mapping_set_gfp_mask(mapping, mask); 4581 #endif 4582 4583 i915_gem_object_init(obj, &i915_gem_object_ops); 4584 4585 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4586 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4587 4588 if (HAS_LLC(dev)) { 4589 /* On some devices, we can have the GPU use the LLC (the CPU 4590 * cache) for about a 10% performance improvement 4591 * compared to uncached. Graphics requests other than 4592 * display scanout are coherent with the CPU in 4593 * accessing this cache. This means in this mode we 4594 * don't need to clflush on the CPU side, and on the 4595 * GPU side we only need to flush internal caches to 4596 * get data visible to the CPU. 4597 * 4598 * However, we maintain the display planes as UC, and so 4599 * need to rebind when first used as such. 4600 */ 4601 obj->cache_level = I915_CACHE_LLC; 4602 } else 4603 obj->cache_level = I915_CACHE_NONE; 4604 4605 trace_i915_gem_object_create(obj); 4606 4607 return obj; 4608 } 4609 4610 static bool discard_backing_storage(struct drm_i915_gem_object *obj) 4611 { 4612 /* If we are the last user of the backing storage (be it shmemfs 4613 * pages or stolen etc), we know that the pages are going to be 4614 * immediately released. In this case, we can then skip copying 4615 * back the contents from the GPU. 4616 */ 4617 4618 if (obj->madv != I915_MADV_WILLNEED) 4619 return false; 4620 4621 if (obj->base.vm_obj == NULL) 4622 return true; 4623 4624 /* At first glance, this looks racy, but then again so would be 4625 * userspace racing mmap against close. However, the first external 4626 * reference to the filp can only be obtained through the 4627 * i915_gem_mmap_ioctl() which safeguards us against the user 4628 * acquiring such a reference whilst we are in the middle of 4629 * freeing the object. 4630 */ 4631 #if 0 4632 return atomic_long_read(&obj->base.filp->f_count) == 1; 4633 #else 4634 return false; 4635 #endif 4636 } 4637 4638 void i915_gem_free_object(struct drm_gem_object *gem_obj) 4639 { 4640 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 4641 struct drm_device *dev = obj->base.dev; 4642 struct drm_i915_private *dev_priv = dev->dev_private; 4643 struct i915_vma *vma, *next; 4644 4645 intel_runtime_pm_get(dev_priv); 4646 4647 trace_i915_gem_object_destroy(obj); 4648 4649 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) { 4650 int ret; 4651 4652 vma->pin_count = 0; 4653 ret = i915_vma_unbind(vma); 4654 if (WARN_ON(ret == -ERESTARTSYS)) { 4655 bool was_interruptible; 4656 4657 was_interruptible = dev_priv->mm.interruptible; 4658 dev_priv->mm.interruptible = false; 4659 4660 WARN_ON(i915_vma_unbind(vma)); 4661 4662 dev_priv->mm.interruptible = was_interruptible; 4663 } 4664 } 4665 4666 /* Stolen objects don't hold a ref, but do hold pin count. Fix that up 4667 * before progressing. */ 4668 if (obj->stolen) 4669 i915_gem_object_unpin_pages(obj); 4670 4671 WARN_ON(obj->frontbuffer_bits); 4672 4673 if (obj->pages && obj->madv == I915_MADV_WILLNEED && 4674 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES && 4675 obj->tiling_mode != I915_TILING_NONE) 4676 i915_gem_object_unpin_pages(obj); 4677 4678 if (WARN_ON(obj->pages_pin_count)) 4679 obj->pages_pin_count = 0; 4680 if (discard_backing_storage(obj)) 4681 obj->madv = I915_MADV_DONTNEED; 4682 i915_gem_object_put_pages(obj); 4683 i915_gem_object_free_mmap_offset(obj); 4684 4685 BUG_ON(obj->pages); 4686 4687 #if 0 4688 if (obj->base.import_attach) 4689 drm_prime_gem_destroy(&obj->base, NULL); 4690 #endif 4691 4692 if (obj->ops->release) 4693 obj->ops->release(obj); 4694 4695 drm_gem_object_release(&obj->base); 4696 i915_gem_info_remove_obj(dev_priv, obj->base.size); 4697 4698 kfree(obj->bit_17); 4699 i915_gem_object_free(obj); 4700 4701 intel_runtime_pm_put(dev_priv); 4702 } 4703 4704 struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, 4705 struct i915_address_space *vm) 4706 { 4707 struct i915_vma *vma; 4708 list_for_each_entry(vma, &obj->vma_list, vma_link) { 4709 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL && 4710 vma->vm == vm) 4711 return vma; 4712 } 4713 return NULL; 4714 } 4715 4716 struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj, 4717 const struct i915_ggtt_view *view) 4718 { 4719 struct i915_address_space *ggtt = i915_obj_to_ggtt(obj); 4720 struct i915_vma *vma; 4721 4722 if (WARN_ONCE(!view, "no view specified")) 4723 return ERR_PTR(-EINVAL); 4724 4725 list_for_each_entry(vma, &obj->vma_list, vma_link) 4726 if (vma->vm == ggtt && 4727 i915_ggtt_view_equal(&vma->ggtt_view, view)) 4728 return vma; 4729 return NULL; 4730 } 4731 4732 void i915_gem_vma_destroy(struct i915_vma *vma) 4733 { 4734 struct i915_address_space *vm = NULL; 4735 WARN_ON(vma->node.allocated); 4736 4737 /* Keep the vma as a placeholder in the execbuffer reservation lists */ 4738 if (!list_empty(&vma->exec_list)) 4739 return; 4740 4741 vm = vma->vm; 4742 4743 if (!i915_is_ggtt(vm)) 4744 i915_ppgtt_put(i915_vm_to_ppgtt(vm)); 4745 4746 list_del(&vma->vma_link); 4747 4748 kfree(vma); 4749 } 4750 4751 static void 4752 i915_gem_stop_ringbuffers(struct drm_device *dev) 4753 { 4754 struct drm_i915_private *dev_priv = dev->dev_private; 4755 struct intel_engine_cs *ring; 4756 int i; 4757 4758 for_each_ring(ring, dev_priv, i) 4759 dev_priv->gt.stop_ring(ring); 4760 } 4761 4762 int 4763 i915_gem_suspend(struct drm_device *dev) 4764 { 4765 struct drm_i915_private *dev_priv = dev->dev_private; 4766 int ret = 0; 4767 4768 mutex_lock(&dev->struct_mutex); 4769 ret = i915_gpu_idle(dev); 4770 if (ret) 4771 goto err; 4772 4773 i915_gem_retire_requests(dev); 4774 4775 i915_gem_stop_ringbuffers(dev); 4776 mutex_unlock(&dev->struct_mutex); 4777 4778 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); 4779 cancel_delayed_work_sync(&dev_priv->mm.retire_work); 4780 #if 0 4781 flush_delayed_work(&dev_priv->mm.idle_work); 4782 #endif 4783 4784 /* Assert that we sucessfully flushed all the work and 4785 * reset the GPU back to its idle, low power state. 4786 */ 4787 WARN_ON(dev_priv->mm.busy); 4788 4789 return 0; 4790 4791 err: 4792 mutex_unlock(&dev->struct_mutex); 4793 return ret; 4794 } 4795 4796 int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice) 4797 { 4798 struct intel_engine_cs *ring = req->ring; 4799 struct drm_device *dev = ring->dev; 4800 struct drm_i915_private *dev_priv = dev->dev_private; 4801 u32 *remap_info = dev_priv->l3_parity.remap_info[slice]; 4802 int i, ret; 4803 4804 if (!HAS_L3_DPF(dev) || !remap_info) 4805 return 0; 4806 4807 ret = intel_ring_begin(req, GEN7_L3LOG_SIZE / 4 * 3); 4808 if (ret) 4809 return ret; 4810 4811 /* 4812 * Note: We do not worry about the concurrent register cacheline hang 4813 * here because no other code should access these registers other than 4814 * at initialization time. 4815 */ 4816 for (i = 0; i < GEN7_L3LOG_SIZE / 4; i++) { 4817 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 4818 intel_ring_emit_reg(ring, GEN7_L3LOG(slice, i)); 4819 intel_ring_emit(ring, remap_info[i]); 4820 } 4821 4822 intel_ring_advance(ring); 4823 4824 return ret; 4825 } 4826 4827 void i915_gem_init_swizzling(struct drm_device *dev) 4828 { 4829 struct drm_i915_private *dev_priv = dev->dev_private; 4830 4831 if (INTEL_INFO(dev)->gen < 5 || 4832 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 4833 return; 4834 4835 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 4836 DISP_TILE_SURFACE_SWIZZLING); 4837 4838 if (IS_GEN5(dev)) 4839 return; 4840 4841 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 4842 if (IS_GEN6(dev)) 4843 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 4844 else if (IS_GEN7(dev)) 4845 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 4846 else if (IS_GEN8(dev)) 4847 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW)); 4848 else 4849 BUG(); 4850 } 4851 4852 static void init_unused_ring(struct drm_device *dev, u32 base) 4853 { 4854 struct drm_i915_private *dev_priv = dev->dev_private; 4855 4856 I915_WRITE(RING_CTL(base), 0); 4857 I915_WRITE(RING_HEAD(base), 0); 4858 I915_WRITE(RING_TAIL(base), 0); 4859 I915_WRITE(RING_START(base), 0); 4860 } 4861 4862 static void init_unused_rings(struct drm_device *dev) 4863 { 4864 if (IS_I830(dev)) { 4865 init_unused_ring(dev, PRB1_BASE); 4866 init_unused_ring(dev, SRB0_BASE); 4867 init_unused_ring(dev, SRB1_BASE); 4868 init_unused_ring(dev, SRB2_BASE); 4869 init_unused_ring(dev, SRB3_BASE); 4870 } else if (IS_GEN2(dev)) { 4871 init_unused_ring(dev, SRB0_BASE); 4872 init_unused_ring(dev, SRB1_BASE); 4873 } else if (IS_GEN3(dev)) { 4874 init_unused_ring(dev, PRB1_BASE); 4875 init_unused_ring(dev, PRB2_BASE); 4876 } 4877 } 4878 4879 int i915_gem_init_rings(struct drm_device *dev) 4880 { 4881 struct drm_i915_private *dev_priv = dev->dev_private; 4882 int ret; 4883 4884 ret = intel_init_render_ring_buffer(dev); 4885 if (ret) 4886 return ret; 4887 4888 if (HAS_BSD(dev)) { 4889 ret = intel_init_bsd_ring_buffer(dev); 4890 if (ret) 4891 goto cleanup_render_ring; 4892 } 4893 4894 if (HAS_BLT(dev)) { 4895 ret = intel_init_blt_ring_buffer(dev); 4896 if (ret) 4897 goto cleanup_bsd_ring; 4898 } 4899 4900 if (HAS_VEBOX(dev)) { 4901 ret = intel_init_vebox_ring_buffer(dev); 4902 if (ret) 4903 goto cleanup_blt_ring; 4904 } 4905 4906 if (HAS_BSD2(dev)) { 4907 ret = intel_init_bsd2_ring_buffer(dev); 4908 if (ret) 4909 goto cleanup_vebox_ring; 4910 } 4911 4912 return 0; 4913 4914 cleanup_vebox_ring: 4915 intel_cleanup_ring_buffer(&dev_priv->ring[VECS]); 4916 cleanup_blt_ring: 4917 intel_cleanup_ring_buffer(&dev_priv->ring[BCS]); 4918 cleanup_bsd_ring: 4919 intel_cleanup_ring_buffer(&dev_priv->ring[VCS]); 4920 cleanup_render_ring: 4921 intel_cleanup_ring_buffer(&dev_priv->ring[RCS]); 4922 4923 return ret; 4924 } 4925 4926 int 4927 i915_gem_init_hw(struct drm_device *dev) 4928 { 4929 struct drm_i915_private *dev_priv = dev->dev_private; 4930 struct intel_engine_cs *ring; 4931 int ret, i, j; 4932 4933 #if 0 4934 if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) 4935 return -EIO; 4936 #endif 4937 4938 /* Double layer security blanket, see i915_gem_init() */ 4939 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 4940 4941 if (dev_priv->ellc_size) 4942 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf)); 4943 4944 if (IS_HASWELL(dev)) 4945 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev) ? 4946 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); 4947 4948 if (HAS_PCH_NOP(dev)) { 4949 if (IS_IVYBRIDGE(dev)) { 4950 u32 temp = I915_READ(GEN7_MSG_CTL); 4951 temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK); 4952 I915_WRITE(GEN7_MSG_CTL, temp); 4953 } else if (INTEL_INFO(dev)->gen >= 7) { 4954 u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT); 4955 temp &= ~RESET_PCH_HANDSHAKE_ENABLE; 4956 I915_WRITE(HSW_NDE_RSTWRN_OPT, temp); 4957 } 4958 } 4959 4960 i915_gem_init_swizzling(dev); 4961 4962 /* 4963 * At least 830 can leave some of the unused rings 4964 * "active" (ie. head != tail) after resume which 4965 * will prevent c3 entry. Makes sure all unused rings 4966 * are totally idle. 4967 */ 4968 init_unused_rings(dev); 4969 4970 BUG_ON(!dev_priv->ring[RCS].default_context); 4971 4972 ret = i915_ppgtt_init_hw(dev); 4973 if (ret) { 4974 DRM_ERROR("PPGTT enable HW failed %d\n", ret); 4975 goto out; 4976 } 4977 4978 /* Need to do basic initialisation of all rings first: */ 4979 for_each_ring(ring, dev_priv, i) { 4980 ret = ring->init_hw(ring); 4981 if (ret) 4982 goto out; 4983 } 4984 4985 /* We can't enable contexts until all firmware is loaded */ 4986 if (HAS_GUC_UCODE(dev)) { 4987 ret = intel_guc_ucode_load(dev); 4988 if (ret) { 4989 DRM_ERROR("Failed to initialize GuC, error %d\n", ret); 4990 ret = -EIO; 4991 goto out; 4992 } 4993 } 4994 4995 /* 4996 * Increment the next seqno by 0x100 so we have a visible break 4997 * on re-initialisation 4998 */ 4999 ret = i915_gem_set_seqno(dev, dev_priv->next_seqno+0x100); 5000 if (ret) 5001 goto out; 5002 5003 /* Now it is safe to go back round and do everything else: */ 5004 for_each_ring(ring, dev_priv, i) { 5005 struct drm_i915_gem_request *req; 5006 5007 WARN_ON(!ring->default_context); 5008 5009 ret = i915_gem_request_alloc(ring, ring->default_context, &req); 5010 if (ret) { 5011 i915_gem_cleanup_ringbuffer(dev); 5012 goto out; 5013 } 5014 5015 if (ring->id == RCS) { 5016 for (j = 0; j < NUM_L3_SLICES(dev); j++) 5017 i915_gem_l3_remap(req, j); 5018 } 5019 5020 ret = i915_ppgtt_init_ring(req); 5021 if (ret && ret != -EIO) { 5022 DRM_ERROR("PPGTT enable ring #%d failed %d\n", i, ret); 5023 i915_gem_request_cancel(req); 5024 i915_gem_cleanup_ringbuffer(dev); 5025 goto out; 5026 } 5027 5028 ret = i915_gem_context_enable(req); 5029 if (ret && ret != -EIO) { 5030 DRM_ERROR("Context enable ring #%d failed %d\n", i, ret); 5031 i915_gem_request_cancel(req); 5032 i915_gem_cleanup_ringbuffer(dev); 5033 goto out; 5034 } 5035 5036 i915_add_request_no_flush(req); 5037 } 5038 5039 out: 5040 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5041 return ret; 5042 } 5043 5044 int i915_gem_init(struct drm_device *dev) 5045 { 5046 struct drm_i915_private *dev_priv = dev->dev_private; 5047 int ret; 5048 5049 i915.enable_execlists = intel_sanitize_enable_execlists(dev, 5050 i915.enable_execlists); 5051 5052 mutex_lock(&dev->struct_mutex); 5053 5054 if (!i915.enable_execlists) { 5055 dev_priv->gt.execbuf_submit = i915_gem_ringbuffer_submission; 5056 dev_priv->gt.init_rings = i915_gem_init_rings; 5057 dev_priv->gt.cleanup_ring = intel_cleanup_ring_buffer; 5058 dev_priv->gt.stop_ring = intel_stop_ring_buffer; 5059 } else { 5060 dev_priv->gt.execbuf_submit = intel_execlists_submission; 5061 dev_priv->gt.init_rings = intel_logical_rings_init; 5062 dev_priv->gt.cleanup_ring = intel_logical_ring_cleanup; 5063 dev_priv->gt.stop_ring = intel_logical_ring_stop; 5064 } 5065 5066 /* This is just a security blanket to placate dragons. 5067 * On some systems, we very sporadically observe that the first TLBs 5068 * used by the CS may be stale, despite us poking the TLB reset. If 5069 * we hold the forcewake during initialisation these problems 5070 * just magically go away. 5071 */ 5072 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5073 5074 ret = i915_gem_init_userptr(dev); 5075 if (ret) 5076 goto out_unlock; 5077 5078 i915_gem_init_global_gtt(dev); 5079 5080 ret = i915_gem_context_init(dev); 5081 if (ret) 5082 goto out_unlock; 5083 5084 ret = dev_priv->gt.init_rings(dev); 5085 if (ret) 5086 goto out_unlock; 5087 5088 ret = i915_gem_init_hw(dev); 5089 if (ret == -EIO) { 5090 /* Allow ring initialisation to fail by marking the GPU as 5091 * wedged. But we only want to do this where the GPU is angry, 5092 * for all other failure, such as an allocation failure, bail. 5093 */ 5094 DRM_ERROR("Failed to initialize GPU, declaring it wedged\n"); 5095 atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter); 5096 ret = 0; 5097 } 5098 5099 out_unlock: 5100 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5101 mutex_unlock(&dev->struct_mutex); 5102 5103 return ret; 5104 } 5105 5106 void 5107 i915_gem_cleanup_ringbuffer(struct drm_device *dev) 5108 { 5109 struct drm_i915_private *dev_priv = dev->dev_private; 5110 struct intel_engine_cs *ring; 5111 int i; 5112 5113 for_each_ring(ring, dev_priv, i) 5114 dev_priv->gt.cleanup_ring(ring); 5115 5116 if (i915.enable_execlists) 5117 /* 5118 * Neither the BIOS, ourselves or any other kernel 5119 * expects the system to be in execlists mode on startup, 5120 * so we need to reset the GPU back to legacy mode. 5121 */ 5122 intel_gpu_reset(dev); 5123 } 5124 5125 static void 5126 init_ring_lists(struct intel_engine_cs *ring) 5127 { 5128 INIT_LIST_HEAD(&ring->active_list); 5129 INIT_LIST_HEAD(&ring->request_list); 5130 } 5131 5132 void 5133 i915_gem_load(struct drm_device *dev) 5134 { 5135 struct drm_i915_private *dev_priv = dev->dev_private; 5136 int i; 5137 5138 INIT_LIST_HEAD(&dev_priv->vm_list); 5139 INIT_LIST_HEAD(&dev_priv->context_list); 5140 INIT_LIST_HEAD(&dev_priv->mm.unbound_list); 5141 INIT_LIST_HEAD(&dev_priv->mm.bound_list); 5142 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 5143 for (i = 0; i < I915_NUM_RINGS; i++) 5144 init_ring_lists(&dev_priv->ring[i]); 5145 for (i = 0; i < I915_MAX_NUM_FENCES; i++) 5146 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); 5147 INIT_DELAYED_WORK(&dev_priv->mm.retire_work, 5148 i915_gem_retire_work_handler); 5149 INIT_DELAYED_WORK(&dev_priv->mm.idle_work, 5150 i915_gem_idle_work_handler); 5151 init_waitqueue_head(&dev_priv->gpu_error.reset_queue); 5152 5153 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; 5154 5155 if (INTEL_INFO(dev)->gen >= 7 && !IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev)) 5156 dev_priv->num_fence_regs = 32; 5157 else if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) 5158 dev_priv->num_fence_regs = 16; 5159 else 5160 dev_priv->num_fence_regs = 8; 5161 5162 if (intel_vgpu_active(dev)) 5163 dev_priv->num_fence_regs = 5164 I915_READ(vgtif_reg(avail_rs.fence_num)); 5165 5166 /* 5167 * Set initial sequence number for requests. 5168 * Using this number allows the wraparound to happen early, 5169 * catching any obvious problems. 5170 */ 5171 dev_priv->next_seqno = ((u32)~0 - 0x1100); 5172 dev_priv->last_seqno = ((u32)~0 - 0x1101); 5173 5174 /* Initialize fence registers to zero */ 5175 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 5176 i915_gem_restore_fences(dev); 5177 5178 i915_gem_detect_bit_6_swizzle(dev); 5179 init_waitqueue_head(&dev_priv->pending_flip_queue); 5180 5181 dev_priv->mm.interruptible = true; 5182 5183 i915_gem_shrinker_init(dev_priv); 5184 5185 lockinit(&dev_priv->fb_tracking.lock, "drmftl", 0, LK_CANRECURSE); 5186 } 5187 5188 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 5189 { 5190 struct drm_i915_file_private *file_priv = file->driver_priv; 5191 5192 /* Clean up our request list when the client is going away, so that 5193 * later retire_requests won't dereference our soon-to-be-gone 5194 * file_priv. 5195 */ 5196 spin_lock(&file_priv->mm.lock); 5197 while (!list_empty(&file_priv->mm.request_list)) { 5198 struct drm_i915_gem_request *request; 5199 5200 request = list_first_entry(&file_priv->mm.request_list, 5201 struct drm_i915_gem_request, 5202 client_list); 5203 list_del(&request->client_list); 5204 request->file_priv = NULL; 5205 } 5206 spin_unlock(&file_priv->mm.lock); 5207 5208 if (!list_empty(&file_priv->rps.link)) { 5209 lockmgr(&to_i915(dev)->rps.client_lock, LK_EXCLUSIVE); 5210 list_del(&file_priv->rps.link); 5211 lockmgr(&to_i915(dev)->rps.client_lock, LK_RELEASE); 5212 } 5213 } 5214 5215 int 5216 i915_gem_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, 5217 vm_ooffset_t foff, struct ucred *cred, u_short *color) 5218 { 5219 *color = 0; /* XXXKIB */ 5220 return (0); 5221 } 5222 5223 void 5224 i915_gem_pager_dtor(void *handle) 5225 { 5226 struct drm_gem_object *obj; 5227 struct drm_device *dev; 5228 5229 obj = handle; 5230 dev = obj->dev; 5231 5232 mutex_lock(&dev->struct_mutex); 5233 drm_gem_free_mmap_offset(obj); 5234 i915_gem_release_mmap(to_intel_bo(obj)); 5235 drm_gem_object_unreference(obj); 5236 mutex_unlock(&dev->struct_mutex); 5237 } 5238 5239 int i915_gem_open(struct drm_device *dev, struct drm_file *file) 5240 { 5241 struct drm_i915_file_private *file_priv; 5242 int ret; 5243 5244 DRM_DEBUG_DRIVER("\n"); 5245 5246 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 5247 if (!file_priv) 5248 return -ENOMEM; 5249 5250 file->driver_priv = file_priv; 5251 file_priv->dev_priv = dev->dev_private; 5252 file_priv->file = file; 5253 INIT_LIST_HEAD(&file_priv->rps.link); 5254 5255 spin_init(&file_priv->mm.lock, "i915_priv"); 5256 INIT_LIST_HEAD(&file_priv->mm.request_list); 5257 5258 ret = i915_gem_context_open(dev, file); 5259 if (ret) 5260 kfree(file_priv); 5261 5262 return ret; 5263 } 5264 5265 /** 5266 * i915_gem_track_fb - update frontbuffer tracking 5267 * @old: current GEM buffer for the frontbuffer slots 5268 * @new: new GEM buffer for the frontbuffer slots 5269 * @frontbuffer_bits: bitmask of frontbuffer slots 5270 * 5271 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them 5272 * from @old and setting them in @new. Both @old and @new can be NULL. 5273 */ 5274 void i915_gem_track_fb(struct drm_i915_gem_object *old, 5275 struct drm_i915_gem_object *new, 5276 unsigned frontbuffer_bits) 5277 { 5278 if (old) { 5279 WARN_ON(!mutex_is_locked(&old->base.dev->struct_mutex)); 5280 WARN_ON(!(old->frontbuffer_bits & frontbuffer_bits)); 5281 old->frontbuffer_bits &= ~frontbuffer_bits; 5282 } 5283 5284 if (new) { 5285 WARN_ON(!mutex_is_locked(&new->base.dev->struct_mutex)); 5286 WARN_ON(new->frontbuffer_bits & frontbuffer_bits); 5287 new->frontbuffer_bits |= frontbuffer_bits; 5288 } 5289 } 5290 5291 /* All the new VM stuff */ 5292 u64 i915_gem_obj_offset(struct drm_i915_gem_object *o, 5293 struct i915_address_space *vm) 5294 { 5295 struct drm_i915_private *dev_priv = o->base.dev->dev_private; 5296 struct i915_vma *vma; 5297 5298 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); 5299 5300 list_for_each_entry(vma, &o->vma_list, vma_link) { 5301 if (i915_is_ggtt(vma->vm) && 5302 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5303 continue; 5304 if (vma->vm == vm) 5305 return vma->node.start; 5306 } 5307 5308 WARN(1, "%s vma for this object not found.\n", 5309 i915_is_ggtt(vm) ? "global" : "ppgtt"); 5310 return -1; 5311 } 5312 5313 u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o, 5314 const struct i915_ggtt_view *view) 5315 { 5316 struct i915_address_space *ggtt = i915_obj_to_ggtt(o); 5317 struct i915_vma *vma; 5318 5319 list_for_each_entry(vma, &o->vma_list, vma_link) 5320 if (vma->vm == ggtt && 5321 i915_ggtt_view_equal(&vma->ggtt_view, view)) 5322 return vma->node.start; 5323 5324 WARN(1, "global vma for this object not found. (view=%u)\n", view->type); 5325 return -1; 5326 } 5327 5328 bool i915_gem_obj_bound(struct drm_i915_gem_object *o, 5329 struct i915_address_space *vm) 5330 { 5331 struct i915_vma *vma; 5332 5333 list_for_each_entry(vma, &o->vma_list, vma_link) { 5334 if (i915_is_ggtt(vma->vm) && 5335 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5336 continue; 5337 if (vma->vm == vm && drm_mm_node_allocated(&vma->node)) 5338 return true; 5339 } 5340 5341 return false; 5342 } 5343 5344 bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o, 5345 const struct i915_ggtt_view *view) 5346 { 5347 struct i915_address_space *ggtt = i915_obj_to_ggtt(o); 5348 struct i915_vma *vma; 5349 5350 list_for_each_entry(vma, &o->vma_list, vma_link) 5351 if (vma->vm == ggtt && 5352 i915_ggtt_view_equal(&vma->ggtt_view, view) && 5353 drm_mm_node_allocated(&vma->node)) 5354 return true; 5355 5356 return false; 5357 } 5358 5359 bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o) 5360 { 5361 struct i915_vma *vma; 5362 5363 list_for_each_entry(vma, &o->vma_list, vma_link) 5364 if (drm_mm_node_allocated(&vma->node)) 5365 return true; 5366 5367 return false; 5368 } 5369 5370 unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o, 5371 struct i915_address_space *vm) 5372 { 5373 struct drm_i915_private *dev_priv = o->base.dev->dev_private; 5374 struct i915_vma *vma; 5375 5376 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); 5377 5378 BUG_ON(list_empty(&o->vma_list)); 5379 5380 list_for_each_entry(vma, &o->vma_list, vma_link) { 5381 if (i915_is_ggtt(vma->vm) && 5382 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5383 continue; 5384 if (vma->vm == vm) 5385 return vma->node.size; 5386 } 5387 return 0; 5388 } 5389 5390 bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj) 5391 { 5392 struct i915_vma *vma; 5393 list_for_each_entry(vma, &obj->vma_list, vma_link) 5394 if (vma->pin_count > 0) 5395 return true; 5396 5397 return false; 5398 } 5399 5400 /* Like i915_gem_object_get_page(), but mark the returned page dirty */ 5401 struct vm_page * 5402 i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n) 5403 { 5404 struct vm_page *page; 5405 5406 /* Only default objects have per-page dirty tracking */ 5407 if (WARN_ON((obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE) == 0)) 5408 return NULL; 5409 5410 page = i915_gem_object_get_page(obj, n); 5411 set_page_dirty(page); 5412 return page; 5413 } 5414 5415 /* Allocate a new GEM object and fill it with the supplied data */ 5416 struct drm_i915_gem_object * 5417 i915_gem_object_create_from_data(struct drm_device *dev, 5418 const void *data, size_t size) 5419 { 5420 struct drm_i915_gem_object *obj; 5421 struct sg_table *sg; 5422 size_t bytes; 5423 int ret; 5424 5425 obj = i915_gem_alloc_object(dev, round_up(size, PAGE_SIZE)); 5426 if (IS_ERR_OR_NULL(obj)) 5427 return obj; 5428 5429 ret = i915_gem_object_set_to_cpu_domain(obj, true); 5430 if (ret) 5431 goto fail; 5432 5433 ret = i915_gem_object_get_pages(obj); 5434 if (ret) 5435 goto fail; 5436 5437 i915_gem_object_pin_pages(obj); 5438 sg = obj->pages; 5439 bytes = sg_copy_from_buffer(sg->sgl, sg->nents, data, size); 5440 obj->dirty = 1; /* Backing store is now out of date */ 5441 i915_gem_object_unpin_pages(obj); 5442 5443 if (WARN_ON(bytes != size)) { 5444 DRM_ERROR("Incomplete copy, wrote %zu of %zu", bytes, size); 5445 ret = -EFAULT; 5446 goto fail; 5447 } 5448 5449 return obj; 5450 5451 fail: 5452 drm_gem_object_unreference(&obj->base); 5453 return ERR_PTR(ret); 5454 } 5455