1 /* 2 * Copyright © 2008-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include <drm/drmP.h> 29 #include <drm/drm_vma_manager.h> 30 #include <drm/i915_drm.h> 31 #include "i915_drv.h" 32 #include "i915_vgpu.h" 33 #include "i915_trace.h" 34 #include "intel_drv.h" 35 #include <linux/shmem_fs.h> 36 #include <linux/slab.h> 37 #include <linux/swap.h> 38 #include <linux/pci.h> 39 40 #define RQ_BUG_ON(expr) 41 42 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); 43 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); 44 static void 45 i915_gem_object_retire__write(struct drm_i915_gem_object *obj); 46 static void 47 i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring); 48 49 static bool cpu_cache_is_coherent(struct drm_device *dev, 50 enum i915_cache_level level) 51 { 52 return HAS_LLC(dev) || level != I915_CACHE_NONE; 53 } 54 55 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 56 { 57 if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) 58 return true; 59 60 return obj->pin_display; 61 } 62 63 /* some bookkeeping */ 64 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 65 size_t size) 66 { 67 spin_lock(&dev_priv->mm.object_stat_lock); 68 dev_priv->mm.object_count++; 69 dev_priv->mm.object_memory += size; 70 spin_unlock(&dev_priv->mm.object_stat_lock); 71 } 72 73 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 74 size_t size) 75 { 76 spin_lock(&dev_priv->mm.object_stat_lock); 77 dev_priv->mm.object_count--; 78 dev_priv->mm.object_memory -= size; 79 spin_unlock(&dev_priv->mm.object_stat_lock); 80 } 81 82 static int 83 i915_gem_wait_for_error(struct i915_gpu_error *error) 84 { 85 int ret; 86 87 #define EXIT_COND (!i915_reset_in_progress(error) || \ 88 i915_terminally_wedged(error)) 89 if (EXIT_COND) 90 return 0; 91 92 /* 93 * Only wait 10 seconds for the gpu reset to complete to avoid hanging 94 * userspace. If it takes that long something really bad is going on and 95 * we should simply try to bail out and fail as gracefully as possible. 96 */ 97 ret = wait_event_interruptible_timeout(error->reset_queue, 98 EXIT_COND, 99 10*HZ); 100 if (ret == 0) { 101 DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); 102 return -EIO; 103 } else if (ret < 0) { 104 return ret; 105 } 106 #undef EXIT_COND 107 108 return 0; 109 } 110 111 int i915_mutex_lock_interruptible(struct drm_device *dev) 112 { 113 struct drm_i915_private *dev_priv = dev->dev_private; 114 int ret; 115 116 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 117 if (ret) 118 return ret; 119 120 ret = mutex_lock_interruptible(&dev->struct_mutex); 121 if (ret) 122 return ret; 123 124 WARN_ON(i915_verify_lists(dev)); 125 return 0; 126 } 127 128 int 129 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 130 struct drm_file *file) 131 { 132 struct drm_i915_private *dev_priv = dev->dev_private; 133 struct drm_i915_gem_get_aperture *args = data; 134 struct i915_gtt *ggtt = &dev_priv->gtt; 135 struct i915_vma *vma; 136 size_t pinned; 137 138 pinned = 0; 139 mutex_lock(&dev->struct_mutex); 140 list_for_each_entry(vma, &ggtt->base.active_list, mm_list) 141 if (vma->pin_count) 142 pinned += vma->node.size; 143 list_for_each_entry(vma, &ggtt->base.inactive_list, mm_list) 144 if (vma->pin_count) 145 pinned += vma->node.size; 146 mutex_unlock(&dev->struct_mutex); 147 148 args->aper_size = dev_priv->gtt.base.total; 149 args->aper_available_size = args->aper_size - pinned; 150 151 return 0; 152 } 153 154 #if 0 155 static int 156 i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) 157 { 158 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; 159 char *vaddr = obj->phys_handle->vaddr; 160 struct sg_table *st; 161 struct scatterlist *sg; 162 int i; 163 164 if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) 165 return -EINVAL; 166 167 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 168 struct page *page; 169 char *src; 170 171 page = shmem_read_mapping_page(mapping, i); 172 if (IS_ERR(page)) 173 return PTR_ERR(page); 174 175 src = kmap_atomic(page); 176 memcpy(vaddr, src, PAGE_SIZE); 177 drm_clflush_virt_range(vaddr, PAGE_SIZE); 178 kunmap_atomic(src); 179 180 page_cache_release(page); 181 vaddr += PAGE_SIZE; 182 } 183 184 i915_gem_chipset_flush(obj->base.dev); 185 186 st = kmalloc(sizeof(*st), GFP_KERNEL); 187 if (st == NULL) 188 return -ENOMEM; 189 190 if (sg_alloc_table(st, 1, GFP_KERNEL)) { 191 kfree(st); 192 return -ENOMEM; 193 } 194 195 sg = st->sgl; 196 sg->offset = 0; 197 sg->length = obj->base.size; 198 199 sg_dma_address(sg) = obj->phys_handle->busaddr; 200 sg_dma_len(sg) = obj->base.size; 201 202 obj->pages = st; 203 return 0; 204 } 205 206 static void 207 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj) 208 { 209 int ret; 210 211 BUG_ON(obj->madv == __I915_MADV_PURGED); 212 213 ret = i915_gem_object_set_to_cpu_domain(obj, true); 214 if (ret) { 215 /* In the event of a disaster, abandon all caches and 216 * hope for the best. 217 */ 218 WARN_ON(ret != -EIO); 219 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 220 } 221 222 if (obj->madv == I915_MADV_DONTNEED) 223 obj->dirty = 0; 224 225 if (obj->dirty) { 226 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; 227 char *vaddr = obj->phys_handle->vaddr; 228 int i; 229 230 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 231 struct page *page; 232 char *dst; 233 234 page = shmem_read_mapping_page(mapping, i); 235 if (IS_ERR(page)) 236 continue; 237 238 dst = kmap_atomic(page); 239 drm_clflush_virt_range(vaddr, PAGE_SIZE); 240 memcpy(dst, vaddr, PAGE_SIZE); 241 kunmap_atomic(dst); 242 243 set_page_dirty(page); 244 if (obj->madv == I915_MADV_WILLNEED) 245 mark_page_accessed(page); 246 page_cache_release(page); 247 vaddr += PAGE_SIZE; 248 } 249 obj->dirty = 0; 250 } 251 252 sg_free_table(obj->pages); 253 kfree(obj->pages); 254 } 255 256 static void 257 i915_gem_object_release_phys(struct drm_i915_gem_object *obj) 258 { 259 drm_pci_free(obj->base.dev, obj->phys_handle); 260 } 261 262 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { 263 .get_pages = i915_gem_object_get_pages_phys, 264 .put_pages = i915_gem_object_put_pages_phys, 265 .release = i915_gem_object_release_phys, 266 }; 267 #endif 268 269 static int 270 drop_pages(struct drm_i915_gem_object *obj) 271 { 272 struct i915_vma *vma, *next; 273 int ret; 274 275 drm_gem_object_reference(&obj->base); 276 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) 277 if (i915_vma_unbind(vma)) 278 break; 279 280 ret = i915_gem_object_put_pages(obj); 281 drm_gem_object_unreference(&obj->base); 282 283 return ret; 284 } 285 286 int 287 i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, 288 int align) 289 { 290 drm_dma_handle_t *phys; 291 int ret; 292 293 if (obj->phys_handle) { 294 if ((unsigned long)obj->phys_handle->vaddr & (align -1)) 295 return -EBUSY; 296 297 return 0; 298 } 299 300 if (obj->madv != I915_MADV_WILLNEED) 301 return -EFAULT; 302 303 #if 0 304 if (obj->base.filp == NULL) 305 return -EINVAL; 306 #endif 307 308 ret = drop_pages(obj); 309 if (ret) 310 return ret; 311 312 /* create a new object */ 313 phys = drm_pci_alloc(obj->base.dev, obj->base.size, align); 314 if (!phys) 315 return -ENOMEM; 316 317 obj->phys_handle = phys; 318 #if 0 319 obj->ops = &i915_gem_phys_ops; 320 #endif 321 322 return i915_gem_object_get_pages(obj); 323 } 324 325 static int 326 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, 327 struct drm_i915_gem_pwrite *args, 328 struct drm_file *file_priv) 329 { 330 struct drm_device *dev = obj->base.dev; 331 void *vaddr = (char *)obj->phys_handle->vaddr + args->offset; 332 char __user *user_data = to_user_ptr(args->data_ptr); 333 int ret = 0; 334 335 /* We manually control the domain here and pretend that it 336 * remains coherent i.e. in the GTT domain, like shmem_pwrite. 337 */ 338 ret = i915_gem_object_wait_rendering(obj, false); 339 if (ret) 340 return ret; 341 342 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 343 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) { 344 unsigned long unwritten; 345 346 /* The physical object once assigned is fixed for the lifetime 347 * of the obj, so we can safely drop the lock and continue 348 * to access vaddr. 349 */ 350 mutex_unlock(&dev->struct_mutex); 351 unwritten = copy_from_user(vaddr, user_data, args->size); 352 mutex_lock(&dev->struct_mutex); 353 if (unwritten) { 354 ret = -EFAULT; 355 goto out; 356 } 357 } 358 359 drm_clflush_virt_range(vaddr, args->size); 360 i915_gem_chipset_flush(dev); 361 362 out: 363 intel_fb_obj_flush(obj, false, ORIGIN_CPU); 364 return ret; 365 } 366 367 void *i915_gem_object_alloc(struct drm_device *dev) 368 { 369 return kmalloc(sizeof(struct drm_i915_gem_object), 370 M_DRM, M_WAITOK | M_ZERO); 371 } 372 373 void i915_gem_object_free(struct drm_i915_gem_object *obj) 374 { 375 kfree(obj); 376 } 377 378 static int 379 i915_gem_create(struct drm_file *file, 380 struct drm_device *dev, 381 uint64_t size, 382 uint32_t *handle_p) 383 { 384 struct drm_i915_gem_object *obj; 385 int ret; 386 u32 handle; 387 388 size = roundup(size, PAGE_SIZE); 389 if (size == 0) 390 return -EINVAL; 391 392 /* Allocate the new object */ 393 obj = i915_gem_alloc_object(dev, size); 394 if (obj == NULL) 395 return -ENOMEM; 396 397 ret = drm_gem_handle_create(file, &obj->base, &handle); 398 /* drop reference from allocate - handle holds it now */ 399 drm_gem_object_unreference_unlocked(&obj->base); 400 if (ret) 401 return ret; 402 403 *handle_p = handle; 404 return 0; 405 } 406 407 int 408 i915_gem_dumb_create(struct drm_file *file, 409 struct drm_device *dev, 410 struct drm_mode_create_dumb *args) 411 { 412 /* have to work out size/pitch and return them */ 413 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); 414 args->size = args->pitch * args->height; 415 return i915_gem_create(file, dev, 416 args->size, &args->handle); 417 } 418 419 /** 420 * Creates a new mm object and returns a handle to it. 421 */ 422 int 423 i915_gem_create_ioctl(struct drm_device *dev, void *data, 424 struct drm_file *file) 425 { 426 struct drm_i915_gem_create *args = data; 427 428 return i915_gem_create(file, dev, 429 args->size, &args->handle); 430 } 431 432 static inline int 433 __copy_to_user_swizzled(char __user *cpu_vaddr, 434 const char *gpu_vaddr, int gpu_offset, 435 int length) 436 { 437 int ret, cpu_offset = 0; 438 439 while (length > 0) { 440 int cacheline_end = ALIGN(gpu_offset + 1, 64); 441 int this_length = min(cacheline_end - gpu_offset, length); 442 int swizzled_gpu_offset = gpu_offset ^ 64; 443 444 ret = __copy_to_user(cpu_vaddr + cpu_offset, 445 gpu_vaddr + swizzled_gpu_offset, 446 this_length); 447 if (ret) 448 return ret + length; 449 450 cpu_offset += this_length; 451 gpu_offset += this_length; 452 length -= this_length; 453 } 454 455 return 0; 456 } 457 458 static inline int 459 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, 460 const char __user *cpu_vaddr, 461 int length) 462 { 463 int ret, cpu_offset = 0; 464 465 while (length > 0) { 466 int cacheline_end = ALIGN(gpu_offset + 1, 64); 467 int this_length = min(cacheline_end - gpu_offset, length); 468 int swizzled_gpu_offset = gpu_offset ^ 64; 469 470 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, 471 cpu_vaddr + cpu_offset, 472 this_length); 473 if (ret) 474 return ret + length; 475 476 cpu_offset += this_length; 477 gpu_offset += this_length; 478 length -= this_length; 479 } 480 481 return 0; 482 } 483 484 /* 485 * Pins the specified object's pages and synchronizes the object with 486 * GPU accesses. Sets needs_clflush to non-zero if the caller should 487 * flush the object from the CPU cache. 488 */ 489 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, 490 int *needs_clflush) 491 { 492 int ret; 493 494 *needs_clflush = 0; 495 496 #if 0 497 if (!obj->base.filp) 498 return -EINVAL; 499 #endif 500 501 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) { 502 /* If we're not in the cpu read domain, set ourself into the gtt 503 * read domain and manually flush cachelines (if required). This 504 * optimizes for the case when the gpu will dirty the data 505 * anyway again before the next pread happens. */ 506 *needs_clflush = !cpu_cache_is_coherent(obj->base.dev, 507 obj->cache_level); 508 ret = i915_gem_object_wait_rendering(obj, true); 509 if (ret) 510 return ret; 511 } 512 513 ret = i915_gem_object_get_pages(obj); 514 if (ret) 515 return ret; 516 517 i915_gem_object_pin_pages(obj); 518 519 return ret; 520 } 521 522 /* Per-page copy function for the shmem pread fastpath. 523 * Flushes invalid cachelines before reading the target if 524 * needs_clflush is set. */ 525 static int 526 shmem_pread_fast(struct vm_page *page, int shmem_page_offset, int page_length, 527 char __user *user_data, 528 bool page_do_bit17_swizzling, bool needs_clflush) 529 { 530 char *vaddr; 531 int ret; 532 533 if (unlikely(page_do_bit17_swizzling)) 534 return -EINVAL; 535 536 vaddr = kmap_atomic(page); 537 if (needs_clflush) 538 drm_clflush_virt_range(vaddr + shmem_page_offset, 539 page_length); 540 ret = __copy_to_user_inatomic(user_data, 541 vaddr + shmem_page_offset, 542 page_length); 543 kunmap_atomic(vaddr); 544 545 return ret ? -EFAULT : 0; 546 } 547 548 static void 549 shmem_clflush_swizzled_range(char *addr, unsigned long length, 550 bool swizzled) 551 { 552 if (unlikely(swizzled)) { 553 unsigned long start = (unsigned long) addr; 554 unsigned long end = (unsigned long) addr + length; 555 556 /* For swizzling simply ensure that we always flush both 557 * channels. Lame, but simple and it works. Swizzled 558 * pwrite/pread is far from a hotpath - current userspace 559 * doesn't use it at all. */ 560 start = round_down(start, 128); 561 end = round_up(end, 128); 562 563 drm_clflush_virt_range((void *)start, end - start); 564 } else { 565 drm_clflush_virt_range(addr, length); 566 } 567 568 } 569 570 /* Only difference to the fast-path function is that this can handle bit17 571 * and uses non-atomic copy and kmap functions. */ 572 static int 573 shmem_pread_slow(struct vm_page *page, int shmem_page_offset, int page_length, 574 char __user *user_data, 575 bool page_do_bit17_swizzling, bool needs_clflush) 576 { 577 char *vaddr; 578 int ret; 579 580 vaddr = kmap(page); 581 if (needs_clflush) 582 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 583 page_length, 584 page_do_bit17_swizzling); 585 586 if (page_do_bit17_swizzling) 587 ret = __copy_to_user_swizzled(user_data, 588 vaddr, shmem_page_offset, 589 page_length); 590 else 591 ret = __copy_to_user(user_data, 592 vaddr + shmem_page_offset, 593 page_length); 594 kunmap(page); 595 596 return ret ? - EFAULT : 0; 597 } 598 599 static int 600 i915_gem_shmem_pread(struct drm_device *dev, 601 struct drm_i915_gem_object *obj, 602 struct drm_i915_gem_pread *args, 603 struct drm_file *file) 604 { 605 char __user *user_data; 606 ssize_t remain; 607 loff_t offset; 608 int shmem_page_offset, page_length, ret = 0; 609 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 610 int prefaulted = 0; 611 int needs_clflush = 0; 612 struct sg_page_iter sg_iter; 613 614 user_data = to_user_ptr(args->data_ptr); 615 remain = args->size; 616 617 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 618 619 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); 620 if (ret) 621 return ret; 622 623 offset = args->offset; 624 625 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 626 offset >> PAGE_SHIFT) { 627 struct vm_page *page = sg_page_iter_page(&sg_iter); 628 629 if (remain <= 0) 630 break; 631 632 /* Operation in this page 633 * 634 * shmem_page_offset = offset within page in shmem file 635 * page_length = bytes to copy for this page 636 */ 637 shmem_page_offset = offset_in_page(offset); 638 page_length = remain; 639 if ((shmem_page_offset + page_length) > PAGE_SIZE) 640 page_length = PAGE_SIZE - shmem_page_offset; 641 642 page_do_bit17_swizzling = obj_do_bit17_swizzling && 643 (page_to_phys(page) & (1 << 17)) != 0; 644 645 ret = shmem_pread_fast(page, shmem_page_offset, page_length, 646 user_data, page_do_bit17_swizzling, 647 needs_clflush); 648 if (ret == 0) 649 goto next_page; 650 651 mutex_unlock(&dev->struct_mutex); 652 653 if (likely(!i915.prefault_disable) && !prefaulted) { 654 ret = fault_in_multipages_writeable(user_data, remain); 655 /* Userspace is tricking us, but we've already clobbered 656 * its pages with the prefault and promised to write the 657 * data up to the first fault. Hence ignore any errors 658 * and just continue. */ 659 (void)ret; 660 prefaulted = 1; 661 } 662 663 ret = shmem_pread_slow(page, shmem_page_offset, page_length, 664 user_data, page_do_bit17_swizzling, 665 needs_clflush); 666 667 mutex_lock(&dev->struct_mutex); 668 669 if (ret) 670 goto out; 671 672 next_page: 673 remain -= page_length; 674 user_data += page_length; 675 offset += page_length; 676 } 677 678 out: 679 i915_gem_object_unpin_pages(obj); 680 681 return ret; 682 } 683 684 /** 685 * Reads data from the object referenced by handle. 686 * 687 * On error, the contents of *data are undefined. 688 */ 689 int 690 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 691 struct drm_file *file) 692 { 693 struct drm_i915_gem_pread *args = data; 694 struct drm_i915_gem_object *obj; 695 int ret = 0; 696 697 if (args->size == 0) 698 return 0; 699 700 ret = i915_mutex_lock_interruptible(dev); 701 if (ret) 702 return ret; 703 704 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 705 if (&obj->base == NULL) { 706 ret = -ENOENT; 707 goto unlock; 708 } 709 710 /* Bounds check source. */ 711 if (args->offset > obj->base.size || 712 args->size > obj->base.size - args->offset) { 713 ret = -EINVAL; 714 goto out; 715 } 716 717 /* prime objects have no backing filp to GEM pread/pwrite 718 * pages from. 719 */ 720 721 trace_i915_gem_object_pread(obj, args->offset, args->size); 722 723 ret = i915_gem_shmem_pread(dev, obj, args, file); 724 725 out: 726 drm_gem_object_unreference(&obj->base); 727 unlock: 728 mutex_unlock(&dev->struct_mutex); 729 return ret; 730 } 731 732 /* This is the fast write path which cannot handle 733 * page faults in the source data 734 */ 735 736 static inline int 737 fast_user_write(struct io_mapping *mapping, 738 loff_t page_base, int page_offset, 739 char __user *user_data, 740 int length) 741 { 742 void __iomem *vaddr_atomic; 743 void *vaddr; 744 unsigned long unwritten; 745 746 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); 747 /* We can use the cpu mem copy function because this is X86. */ 748 vaddr = (char __force*)vaddr_atomic + page_offset; 749 unwritten = __copy_from_user_inatomic_nocache(vaddr, 750 user_data, length); 751 io_mapping_unmap_atomic(vaddr_atomic); 752 return unwritten; 753 } 754 755 /** 756 * This is the fast pwrite path, where we copy the data directly from the 757 * user into the GTT, uncached. 758 */ 759 static int 760 i915_gem_gtt_pwrite_fast(struct drm_device *dev, 761 struct drm_i915_gem_object *obj, 762 struct drm_i915_gem_pwrite *args, 763 struct drm_file *file) 764 { 765 struct drm_i915_private *dev_priv = dev->dev_private; 766 ssize_t remain; 767 loff_t offset, page_base; 768 char __user *user_data; 769 int page_offset, page_length, ret; 770 771 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK); 772 if (ret) 773 goto out; 774 775 ret = i915_gem_object_set_to_gtt_domain(obj, true); 776 if (ret) 777 goto out_unpin; 778 779 ret = i915_gem_object_put_fence(obj); 780 if (ret) 781 goto out_unpin; 782 783 user_data = to_user_ptr(args->data_ptr); 784 remain = args->size; 785 786 offset = i915_gem_obj_ggtt_offset(obj) + args->offset; 787 788 intel_fb_obj_invalidate(obj, ORIGIN_GTT); 789 790 while (remain > 0) { 791 /* Operation in this page 792 * 793 * page_base = page offset within aperture 794 * page_offset = offset within page 795 * page_length = bytes to copy for this page 796 */ 797 page_base = offset & ~PAGE_MASK; 798 page_offset = offset_in_page(offset); 799 page_length = remain; 800 if ((page_offset + remain) > PAGE_SIZE) 801 page_length = PAGE_SIZE - page_offset; 802 803 /* If we get a fault while copying data, then (presumably) our 804 * source page isn't available. Return the error and we'll 805 * retry in the slow path. 806 */ 807 if (fast_user_write(dev_priv->gtt.mappable, page_base, 808 page_offset, user_data, page_length)) { 809 ret = -EFAULT; 810 goto out_flush; 811 } 812 813 remain -= page_length; 814 user_data += page_length; 815 offset += page_length; 816 } 817 818 out_flush: 819 intel_fb_obj_flush(obj, false, ORIGIN_GTT); 820 out_unpin: 821 i915_gem_object_ggtt_unpin(obj); 822 out: 823 return ret; 824 } 825 826 /* Per-page copy function for the shmem pwrite fastpath. 827 * Flushes invalid cachelines before writing to the target if 828 * needs_clflush_before is set and flushes out any written cachelines after 829 * writing if needs_clflush is set. */ 830 static int 831 shmem_pwrite_fast(struct vm_page *page, int shmem_page_offset, int page_length, 832 char __user *user_data, 833 bool page_do_bit17_swizzling, 834 bool needs_clflush_before, 835 bool needs_clflush_after) 836 { 837 char *vaddr; 838 int ret; 839 840 if (unlikely(page_do_bit17_swizzling)) 841 return -EINVAL; 842 843 vaddr = kmap_atomic(page); 844 if (needs_clflush_before) 845 drm_clflush_virt_range(vaddr + shmem_page_offset, 846 page_length); 847 ret = __copy_from_user_inatomic(vaddr + shmem_page_offset, 848 user_data, page_length); 849 if (needs_clflush_after) 850 drm_clflush_virt_range(vaddr + shmem_page_offset, 851 page_length); 852 kunmap_atomic(vaddr); 853 854 return ret ? -EFAULT : 0; 855 } 856 857 /* Only difference to the fast-path function is that this can handle bit17 858 * and uses non-atomic copy and kmap functions. */ 859 static int 860 shmem_pwrite_slow(struct vm_page *page, int shmem_page_offset, int page_length, 861 char __user *user_data, 862 bool page_do_bit17_swizzling, 863 bool needs_clflush_before, 864 bool needs_clflush_after) 865 { 866 char *vaddr; 867 int ret; 868 869 vaddr = kmap(page); 870 if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) 871 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 872 page_length, 873 page_do_bit17_swizzling); 874 if (page_do_bit17_swizzling) 875 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset, 876 user_data, 877 page_length); 878 else 879 ret = __copy_from_user(vaddr + shmem_page_offset, 880 user_data, 881 page_length); 882 if (needs_clflush_after) 883 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 884 page_length, 885 page_do_bit17_swizzling); 886 kunmap(page); 887 888 return ret ? -EFAULT : 0; 889 } 890 891 static int 892 i915_gem_shmem_pwrite(struct drm_device *dev, 893 struct drm_i915_gem_object *obj, 894 struct drm_i915_gem_pwrite *args, 895 struct drm_file *file) 896 { 897 ssize_t remain; 898 loff_t offset; 899 char __user *user_data; 900 int shmem_page_offset, page_length, ret = 0; 901 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 902 int hit_slowpath = 0; 903 int needs_clflush_after = 0; 904 int needs_clflush_before = 0; 905 struct sg_page_iter sg_iter; 906 907 user_data = to_user_ptr(args->data_ptr); 908 remain = args->size; 909 910 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 911 912 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 913 /* If we're not in the cpu write domain, set ourself into the gtt 914 * write domain and manually flush cachelines (if required). This 915 * optimizes for the case when the gpu will use the data 916 * right away and we therefore have to clflush anyway. */ 917 needs_clflush_after = cpu_write_needs_clflush(obj); 918 ret = i915_gem_object_wait_rendering(obj, false); 919 if (ret) 920 return ret; 921 } 922 /* Same trick applies to invalidate partially written cachelines read 923 * before writing. */ 924 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) 925 needs_clflush_before = 926 !cpu_cache_is_coherent(dev, obj->cache_level); 927 928 ret = i915_gem_object_get_pages(obj); 929 if (ret) 930 return ret; 931 932 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 933 934 i915_gem_object_pin_pages(obj); 935 936 offset = args->offset; 937 obj->dirty = 1; 938 939 VM_OBJECT_LOCK(obj->base.vm_obj); 940 vm_object_pip_add(obj->base.vm_obj, 1); 941 942 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 943 offset >> PAGE_SHIFT) { 944 struct vm_page *page = sg_page_iter_page(&sg_iter); 945 int partial_cacheline_write; 946 947 if (remain <= 0) 948 break; 949 950 /* Operation in this page 951 * 952 * shmem_page_offset = offset within page in shmem file 953 * page_length = bytes to copy for this page 954 */ 955 shmem_page_offset = offset_in_page(offset); 956 957 page_length = remain; 958 if ((shmem_page_offset + page_length) > PAGE_SIZE) 959 page_length = PAGE_SIZE - shmem_page_offset; 960 961 /* If we don't overwrite a cacheline completely we need to be 962 * careful to have up-to-date data by first clflushing. Don't 963 * overcomplicate things and flush the entire patch. */ 964 partial_cacheline_write = needs_clflush_before && 965 ((shmem_page_offset | page_length) 966 & (cpu_clflush_line_size - 1)); 967 968 page_do_bit17_swizzling = obj_do_bit17_swizzling && 969 (page_to_phys(page) & (1 << 17)) != 0; 970 971 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length, 972 user_data, page_do_bit17_swizzling, 973 partial_cacheline_write, 974 needs_clflush_after); 975 if (ret == 0) 976 goto next_page; 977 978 hit_slowpath = 1; 979 mutex_unlock(&dev->struct_mutex); 980 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length, 981 user_data, page_do_bit17_swizzling, 982 partial_cacheline_write, 983 needs_clflush_after); 984 985 mutex_lock(&dev->struct_mutex); 986 987 if (ret) 988 goto out; 989 990 next_page: 991 remain -= page_length; 992 user_data += page_length; 993 offset += page_length; 994 } 995 vm_object_pip_wakeup(obj->base.vm_obj); 996 VM_OBJECT_UNLOCK(obj->base.vm_obj); 997 998 out: 999 i915_gem_object_unpin_pages(obj); 1000 1001 if (hit_slowpath) { 1002 /* 1003 * Fixup: Flush cpu caches in case we didn't flush the dirty 1004 * cachelines in-line while writing and the object moved 1005 * out of the cpu write domain while we've dropped the lock. 1006 */ 1007 if (!needs_clflush_after && 1008 obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 1009 if (i915_gem_clflush_object(obj, obj->pin_display)) 1010 needs_clflush_after = true; 1011 } 1012 } 1013 1014 if (needs_clflush_after) 1015 i915_gem_chipset_flush(dev); 1016 else 1017 obj->cache_dirty = true; 1018 1019 intel_fb_obj_flush(obj, false, ORIGIN_CPU); 1020 return ret; 1021 } 1022 1023 /** 1024 * Writes data to the object referenced by handle. 1025 * 1026 * On error, the contents of the buffer that were to be modified are undefined. 1027 */ 1028 int 1029 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 1030 struct drm_file *file) 1031 { 1032 struct drm_i915_private *dev_priv = dev->dev_private; 1033 struct drm_i915_gem_pwrite *args = data; 1034 struct drm_i915_gem_object *obj; 1035 int ret; 1036 1037 if (args->size == 0) 1038 return 0; 1039 1040 if (likely(!i915.prefault_disable)) { 1041 ret = fault_in_multipages_readable(to_user_ptr(args->data_ptr), 1042 args->size); 1043 if (ret) 1044 return -EFAULT; 1045 } 1046 1047 intel_runtime_pm_get(dev_priv); 1048 1049 ret = i915_mutex_lock_interruptible(dev); 1050 if (ret) 1051 goto put_rpm; 1052 1053 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1054 if (&obj->base == NULL) { 1055 ret = -ENOENT; 1056 goto unlock; 1057 } 1058 1059 /* Bounds check destination. */ 1060 if (args->offset > obj->base.size || 1061 args->size > obj->base.size - args->offset) { 1062 ret = -EINVAL; 1063 goto out; 1064 } 1065 1066 /* prime objects have no backing filp to GEM pread/pwrite 1067 * pages from. 1068 */ 1069 1070 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 1071 1072 ret = -EFAULT; 1073 /* We can only do the GTT pwrite on untiled buffers, as otherwise 1074 * it would end up going through the fenced access, and we'll get 1075 * different detiling behavior between reading and writing. 1076 * pread/pwrite currently are reading and writing from the CPU 1077 * perspective, requiring manual detiling by the client. 1078 */ 1079 if (obj->tiling_mode == I915_TILING_NONE && 1080 obj->base.write_domain != I915_GEM_DOMAIN_CPU && 1081 cpu_write_needs_clflush(obj)) { 1082 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); 1083 /* Note that the gtt paths might fail with non-page-backed user 1084 * pointers (e.g. gtt mappings when moving data between 1085 * textures). Fallback to the shmem path in that case. */ 1086 } 1087 1088 if (ret == -EFAULT || ret == -ENOSPC) { 1089 if (obj->phys_handle) 1090 ret = i915_gem_phys_pwrite(obj, args, file); 1091 else 1092 ret = i915_gem_shmem_pwrite(dev, obj, args, file); 1093 } 1094 1095 out: 1096 drm_gem_object_unreference(&obj->base); 1097 unlock: 1098 mutex_unlock(&dev->struct_mutex); 1099 put_rpm: 1100 intel_runtime_pm_put(dev_priv); 1101 1102 return ret; 1103 } 1104 1105 int 1106 i915_gem_check_wedge(struct i915_gpu_error *error, 1107 bool interruptible) 1108 { 1109 if (i915_reset_in_progress(error)) { 1110 /* Non-interruptible callers can't handle -EAGAIN, hence return 1111 * -EIO unconditionally for these. */ 1112 if (!interruptible) 1113 return -EIO; 1114 1115 /* Recovery complete, but the reset failed ... */ 1116 if (i915_terminally_wedged(error)) 1117 return -EIO; 1118 1119 /* 1120 * Check if GPU Reset is in progress - we need intel_ring_begin 1121 * to work properly to reinit the hw state while the gpu is 1122 * still marked as reset-in-progress. Handle this with a flag. 1123 */ 1124 if (!error->reload_in_reset) 1125 return -EAGAIN; 1126 } 1127 1128 return 0; 1129 } 1130 1131 static void fake_irq(unsigned long data) 1132 { 1133 wakeup_one((void *)data); 1134 } 1135 1136 static bool missed_irq(struct drm_i915_private *dev_priv, 1137 struct intel_engine_cs *ring) 1138 { 1139 return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings); 1140 } 1141 1142 #if 0 1143 static int __i915_spin_request(struct drm_i915_gem_request *req, int state) 1144 { 1145 unsigned long timeout; 1146 unsigned cpu; 1147 1148 /* When waiting for high frequency requests, e.g. during synchronous 1149 * rendering split between the CPU and GPU, the finite amount of time 1150 * required to set up the irq and wait upon it limits the response 1151 * rate. By busywaiting on the request completion for a short while we 1152 * can service the high frequency waits as quick as possible. However, 1153 * if it is a slow request, we want to sleep as quickly as possible. 1154 * The tradeoff between waiting and sleeping is roughly the time it 1155 * takes to sleep on a request, on the order of a microsecond. 1156 */ 1157 1158 if (req->ring->irq_refcount) 1159 return -EBUSY; 1160 1161 /* Only spin if we know the GPU is processing this request */ 1162 if (!i915_gem_request_started(req, true)) 1163 return -EAGAIN; 1164 1165 timeout = local_clock_us(&cpu) + 5; 1166 while (!need_resched()) { 1167 if (i915_gem_request_completed(req, true)) 1168 return 0; 1169 1170 if (signal_pending_state(state, current)) 1171 break; 1172 1173 if (busywait_stop(timeout, cpu)) 1174 break; 1175 1176 cpu_relax_lowlatency(); 1177 } 1178 1179 if (i915_gem_request_completed(req, false)) 1180 return 0; 1181 1182 return -EAGAIN; 1183 } 1184 #endif 1185 1186 /** 1187 * __i915_wait_request - wait until execution of request has finished 1188 * @req: duh! 1189 * @reset_counter: reset sequence associated with the given request 1190 * @interruptible: do an interruptible wait (normally yes) 1191 * @timeout: in - how long to wait (NULL forever); out - how much time remaining 1192 * 1193 * Note: It is of utmost importance that the passed in seqno and reset_counter 1194 * values have been read by the caller in an smp safe manner. Where read-side 1195 * locks are involved, it is sufficient to read the reset_counter before 1196 * unlocking the lock that protects the seqno. For lockless tricks, the 1197 * reset_counter _must_ be read before, and an appropriate smp_rmb must be 1198 * inserted. 1199 * 1200 * Returns 0 if the request was found within the alloted time. Else returns the 1201 * errno with remaining time filled in timeout argument. 1202 */ 1203 int __i915_wait_request(struct drm_i915_gem_request *req, 1204 unsigned reset_counter, 1205 bool interruptible, 1206 s64 *timeout, 1207 struct intel_rps_client *rps) 1208 { 1209 struct intel_engine_cs *ring = i915_gem_request_get_ring(req); 1210 struct drm_device *dev = ring->dev; 1211 struct drm_i915_private *dev_priv = dev->dev_private; 1212 const bool irq_test_in_progress = 1213 ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring); 1214 unsigned long timeout_expire; 1215 s64 before, now; 1216 int ret, sl_timeout = 1; 1217 1218 WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled"); 1219 1220 if (list_empty(&req->list)) 1221 return 0; 1222 1223 if (i915_gem_request_completed(req, true)) 1224 return 0; 1225 1226 timeout_expire = 0; 1227 if (timeout) { 1228 if (WARN_ON(*timeout < 0)) 1229 return -EINVAL; 1230 1231 if (*timeout == 0) 1232 return -ETIME; 1233 1234 timeout_expire = jiffies + nsecs_to_jiffies_timeout(*timeout); 1235 } 1236 1237 if (INTEL_INFO(dev_priv)->gen >= 6) 1238 gen6_rps_boost(dev_priv, rps, req->emitted_jiffies); 1239 1240 /* Record current time in case interrupted by signal, or wedged */ 1241 trace_i915_gem_request_wait_begin(req); 1242 before = ktime_get_raw_ns(); 1243 1244 /* Optimistic spin for the next jiffie before touching IRQs */ 1245 #if 0 1246 ret = __i915_spin_request(req); 1247 if (ret == 0) 1248 goto out; 1249 #endif 1250 1251 if (!irq_test_in_progress && WARN_ON(!ring->irq_get(ring))) { 1252 ret = -ENODEV; 1253 goto out; 1254 } 1255 1256 lockmgr(&ring->irq_queue.lock, LK_EXCLUSIVE); 1257 for (;;) { 1258 struct timer_list timer; 1259 1260 /* We need to check whether any gpu reset happened in between 1261 * the caller grabbing the seqno and now ... */ 1262 if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) { 1263 /* ... but upgrade the -EAGAIN to an -EIO if the gpu 1264 * is truely gone. */ 1265 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); 1266 if (ret == 0) 1267 ret = -EAGAIN; 1268 break; 1269 } 1270 1271 if (i915_gem_request_completed(req, false)) { 1272 ret = 0; 1273 break; 1274 } 1275 1276 if (interruptible && signal_pending(curthread->td_lwp)) { 1277 ret = -ERESTARTSYS; 1278 break; 1279 } 1280 1281 if (timeout && time_after_eq(jiffies, timeout_expire)) { 1282 ret = -ETIME; 1283 break; 1284 } 1285 1286 timer.function = NULL; 1287 if (timeout || missed_irq(dev_priv, ring)) { 1288 unsigned long expire; 1289 1290 setup_timer_on_stack(&timer, fake_irq, (unsigned long)&ring->irq_queue); 1291 expire = missed_irq(dev_priv, ring) ? jiffies + 1 : timeout_expire; 1292 sl_timeout = expire - jiffies; 1293 if (sl_timeout < 1) 1294 sl_timeout = 1; 1295 mod_timer(&timer, expire); 1296 } 1297 1298 #if 0 1299 io_schedule(); 1300 #endif 1301 1302 if (timer.function) { 1303 del_singleshot_timer_sync(&timer); 1304 destroy_timer_on_stack(&timer); 1305 } 1306 1307 lksleep(&ring->irq_queue, &ring->irq_queue.lock, 1308 interruptible ? PCATCH : 0, "lwe", sl_timeout); 1309 } 1310 lockmgr(&ring->irq_queue.lock, LK_RELEASE); 1311 if (!irq_test_in_progress) 1312 ring->irq_put(ring); 1313 1314 out: 1315 now = ktime_get_raw_ns(); 1316 trace_i915_gem_request_wait_end(req); 1317 1318 if (timeout) { 1319 s64 tres = *timeout - (now - before); 1320 1321 *timeout = tres < 0 ? 0 : tres; 1322 1323 /* 1324 * Apparently ktime isn't accurate enough and occasionally has a 1325 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch 1326 * things up to make the test happy. We allow up to 1 jiffy. 1327 * 1328 * This is a regrssion from the timespec->ktime conversion. 1329 */ 1330 if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000) 1331 *timeout = 0; 1332 } 1333 1334 return ret; 1335 } 1336 1337 int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, 1338 struct drm_file *file) 1339 { 1340 struct drm_i915_private *dev_private; 1341 struct drm_i915_file_private *file_priv; 1342 1343 WARN_ON(!req || !file || req->file_priv); 1344 1345 if (!req || !file) 1346 return -EINVAL; 1347 1348 if (req->file_priv) 1349 return -EINVAL; 1350 1351 dev_private = req->ring->dev->dev_private; 1352 file_priv = file->driver_priv; 1353 1354 spin_lock(&file_priv->mm.lock); 1355 req->file_priv = file_priv; 1356 list_add_tail(&req->client_list, &file_priv->mm.request_list); 1357 spin_unlock(&file_priv->mm.lock); 1358 1359 req->pid = curproc->p_pid; 1360 1361 return 0; 1362 } 1363 1364 static inline void 1365 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) 1366 { 1367 struct drm_i915_file_private *file_priv = request->file_priv; 1368 1369 if (!file_priv) 1370 return; 1371 1372 spin_lock(&file_priv->mm.lock); 1373 list_del(&request->client_list); 1374 request->file_priv = NULL; 1375 spin_unlock(&file_priv->mm.lock); 1376 1377 #if 0 1378 put_pid(request->pid); 1379 request->pid = NULL; 1380 #endif 1381 } 1382 1383 static void i915_gem_request_retire(struct drm_i915_gem_request *request) 1384 { 1385 trace_i915_gem_request_retire(request); 1386 1387 /* We know the GPU must have read the request to have 1388 * sent us the seqno + interrupt, so use the position 1389 * of tail of the request to update the last known position 1390 * of the GPU head. 1391 * 1392 * Note this requires that we are always called in request 1393 * completion order. 1394 */ 1395 request->ringbuf->last_retired_head = request->postfix; 1396 1397 list_del_init(&request->list); 1398 i915_gem_request_remove_from_client(request); 1399 1400 i915_gem_request_unreference(request); 1401 } 1402 1403 static void 1404 __i915_gem_request_retire__upto(struct drm_i915_gem_request *req) 1405 { 1406 struct intel_engine_cs *engine = req->ring; 1407 struct drm_i915_gem_request *tmp; 1408 1409 lockdep_assert_held(&engine->dev->struct_mutex); 1410 1411 if (list_empty(&req->list)) 1412 return; 1413 1414 do { 1415 tmp = list_first_entry(&engine->request_list, 1416 typeof(*tmp), list); 1417 1418 i915_gem_request_retire(tmp); 1419 } while (tmp != req); 1420 1421 WARN_ON(i915_verify_lists(engine->dev)); 1422 } 1423 1424 /** 1425 * Waits for a request to be signaled, and cleans up the 1426 * request and object lists appropriately for that event. 1427 */ 1428 int 1429 i915_wait_request(struct drm_i915_gem_request *req) 1430 { 1431 struct drm_device *dev; 1432 struct drm_i915_private *dev_priv; 1433 bool interruptible; 1434 int ret; 1435 1436 BUG_ON(req == NULL); 1437 1438 dev = req->ring->dev; 1439 dev_priv = dev->dev_private; 1440 interruptible = dev_priv->mm.interruptible; 1441 1442 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1443 1444 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); 1445 if (ret) 1446 return ret; 1447 1448 ret = __i915_wait_request(req, 1449 atomic_read(&dev_priv->gpu_error.reset_counter), 1450 interruptible, NULL, NULL); 1451 if (ret) 1452 return ret; 1453 1454 __i915_gem_request_retire__upto(req); 1455 return 0; 1456 } 1457 1458 /** 1459 * Ensures that all rendering to the object has completed and the object is 1460 * safe to unbind from the GTT or access from the CPU. 1461 */ 1462 int 1463 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, 1464 bool readonly) 1465 { 1466 int ret, i; 1467 1468 if (!obj->active) 1469 return 0; 1470 1471 if (readonly) { 1472 if (obj->last_write_req != NULL) { 1473 ret = i915_wait_request(obj->last_write_req); 1474 if (ret) 1475 return ret; 1476 1477 i = obj->last_write_req->ring->id; 1478 if (obj->last_read_req[i] == obj->last_write_req) 1479 i915_gem_object_retire__read(obj, i); 1480 else 1481 i915_gem_object_retire__write(obj); 1482 } 1483 } else { 1484 for (i = 0; i < I915_NUM_RINGS; i++) { 1485 if (obj->last_read_req[i] == NULL) 1486 continue; 1487 1488 ret = i915_wait_request(obj->last_read_req[i]); 1489 if (ret) 1490 return ret; 1491 1492 i915_gem_object_retire__read(obj, i); 1493 } 1494 RQ_BUG_ON(obj->active); 1495 } 1496 1497 return 0; 1498 } 1499 1500 static void 1501 i915_gem_object_retire_request(struct drm_i915_gem_object *obj, 1502 struct drm_i915_gem_request *req) 1503 { 1504 int ring = req->ring->id; 1505 1506 if (obj->last_read_req[ring] == req) 1507 i915_gem_object_retire__read(obj, ring); 1508 else if (obj->last_write_req == req) 1509 i915_gem_object_retire__write(obj); 1510 1511 __i915_gem_request_retire__upto(req); 1512 } 1513 1514 /* A nonblocking variant of the above wait. This is a highly dangerous routine 1515 * as the object state may change during this call. 1516 */ 1517 static __must_check int 1518 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, 1519 struct intel_rps_client *rps, 1520 bool readonly) 1521 { 1522 struct drm_device *dev = obj->base.dev; 1523 struct drm_i915_private *dev_priv = dev->dev_private; 1524 struct drm_i915_gem_request *requests[I915_NUM_RINGS]; 1525 unsigned reset_counter; 1526 int ret, i, n = 0; 1527 1528 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1529 BUG_ON(!dev_priv->mm.interruptible); 1530 1531 if (!obj->active) 1532 return 0; 1533 1534 ret = i915_gem_check_wedge(&dev_priv->gpu_error, true); 1535 if (ret) 1536 return ret; 1537 1538 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 1539 1540 if (readonly) { 1541 struct drm_i915_gem_request *req; 1542 1543 req = obj->last_write_req; 1544 if (req == NULL) 1545 return 0; 1546 1547 requests[n++] = i915_gem_request_reference(req); 1548 } else { 1549 for (i = 0; i < I915_NUM_RINGS; i++) { 1550 struct drm_i915_gem_request *req; 1551 1552 req = obj->last_read_req[i]; 1553 if (req == NULL) 1554 continue; 1555 1556 requests[n++] = i915_gem_request_reference(req); 1557 } 1558 } 1559 1560 mutex_unlock(&dev->struct_mutex); 1561 for (i = 0; ret == 0 && i < n; i++) 1562 ret = __i915_wait_request(requests[i], reset_counter, true, 1563 NULL, rps); 1564 mutex_lock(&dev->struct_mutex); 1565 1566 for (i = 0; i < n; i++) { 1567 if (ret == 0) 1568 i915_gem_object_retire_request(obj, requests[i]); 1569 i915_gem_request_unreference(requests[i]); 1570 } 1571 1572 return ret; 1573 } 1574 1575 static struct intel_rps_client *to_rps_client(struct drm_file *file) 1576 { 1577 struct drm_i915_file_private *fpriv = file->driver_priv; 1578 return &fpriv->rps; 1579 } 1580 1581 /** 1582 * Called when user space prepares to use an object with the CPU, either 1583 * through the mmap ioctl's mapping or a GTT mapping. 1584 */ 1585 int 1586 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1587 struct drm_file *file) 1588 { 1589 struct drm_i915_gem_set_domain *args = data; 1590 struct drm_i915_gem_object *obj; 1591 uint32_t read_domains = args->read_domains; 1592 uint32_t write_domain = args->write_domain; 1593 int ret; 1594 1595 /* Only handle setting domains to types used by the CPU. */ 1596 if (write_domain & I915_GEM_GPU_DOMAINS) 1597 return -EINVAL; 1598 1599 if (read_domains & I915_GEM_GPU_DOMAINS) 1600 return -EINVAL; 1601 1602 /* Having something in the write domain implies it's in the read 1603 * domain, and only that read domain. Enforce that in the request. 1604 */ 1605 if (write_domain != 0 && read_domains != write_domain) 1606 return -EINVAL; 1607 1608 ret = i915_mutex_lock_interruptible(dev); 1609 if (ret) 1610 return ret; 1611 1612 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1613 if (&obj->base == NULL) { 1614 ret = -ENOENT; 1615 goto unlock; 1616 } 1617 1618 /* Try to flush the object off the GPU without holding the lock. 1619 * We will repeat the flush holding the lock in the normal manner 1620 * to catch cases where we are gazumped. 1621 */ 1622 ret = i915_gem_object_wait_rendering__nonblocking(obj, 1623 to_rps_client(file), 1624 !write_domain); 1625 if (ret) 1626 goto unref; 1627 1628 if (read_domains & I915_GEM_DOMAIN_GTT) 1629 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 1630 else 1631 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 1632 1633 if (write_domain != 0) 1634 intel_fb_obj_invalidate(obj, 1635 write_domain == I915_GEM_DOMAIN_GTT ? 1636 ORIGIN_GTT : ORIGIN_CPU); 1637 1638 unref: 1639 drm_gem_object_unreference(&obj->base); 1640 unlock: 1641 mutex_unlock(&dev->struct_mutex); 1642 return ret; 1643 } 1644 1645 /** 1646 * Called when user space has done writes to this buffer 1647 */ 1648 int 1649 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1650 struct drm_file *file) 1651 { 1652 struct drm_i915_gem_sw_finish *args = data; 1653 struct drm_i915_gem_object *obj; 1654 int ret = 0; 1655 1656 ret = i915_mutex_lock_interruptible(dev); 1657 if (ret) 1658 return ret; 1659 1660 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1661 if (&obj->base == NULL) { 1662 ret = -ENOENT; 1663 goto unlock; 1664 } 1665 1666 /* Pinned buffers may be scanout, so flush the cache */ 1667 if (obj->pin_display) 1668 i915_gem_object_flush_cpu_write_domain(obj); 1669 1670 drm_gem_object_unreference(&obj->base); 1671 unlock: 1672 mutex_unlock(&dev->struct_mutex); 1673 return ret; 1674 } 1675 1676 /** 1677 * Maps the contents of an object, returning the address it is mapped 1678 * into. 1679 * 1680 * While the mapping holds a reference on the contents of the object, it doesn't 1681 * imply a ref on the object itself. 1682 * 1683 * IMPORTANT: 1684 * 1685 * DRM driver writers who look a this function as an example for how to do GEM 1686 * mmap support, please don't implement mmap support like here. The modern way 1687 * to implement DRM mmap support is with an mmap offset ioctl (like 1688 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly. 1689 * That way debug tooling like valgrind will understand what's going on, hiding 1690 * the mmap call in a driver private ioctl will break that. The i915 driver only 1691 * does cpu mmaps this way because we didn't know better. 1692 */ 1693 int 1694 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1695 struct drm_file *file) 1696 { 1697 struct drm_i915_gem_mmap *args = data; 1698 struct drm_gem_object *obj; 1699 unsigned long addr; 1700 1701 struct proc *p = curproc; 1702 vm_map_t map = &p->p_vmspace->vm_map; 1703 vm_size_t size; 1704 int error = 0, rv; 1705 1706 if (args->flags & ~(I915_MMAP_WC)) 1707 return -EINVAL; 1708 1709 obj = drm_gem_object_lookup(dev, file, args->handle); 1710 if (obj == NULL) 1711 return -ENOENT; 1712 1713 if (args->size == 0) 1714 goto out; 1715 1716 size = round_page(args->size); 1717 if (map->size + size > p->p_rlimit[RLIMIT_VMEM].rlim_cur) { 1718 error = -ENOMEM; 1719 goto out; 1720 } 1721 1722 /* prime objects have no backing filp to GEM mmap 1723 * pages from. 1724 */ 1725 1726 /* 1727 * Call hint to ensure that NULL is not returned as a valid address 1728 * and to reduce vm_map traversals. XXX causes instability, use a 1729 * fixed low address as the start point instead to avoid the NULL 1730 * return issue. 1731 */ 1732 1733 addr = PAGE_SIZE; 1734 1735 /* 1736 * Use 256KB alignment. It is unclear why this matters for a 1737 * virtual address but it appears to fix a number of application/X 1738 * crashes and kms console switching is much faster. 1739 */ 1740 vm_object_hold(obj->vm_obj); 1741 vm_object_reference_locked(obj->vm_obj); 1742 vm_object_drop(obj->vm_obj); 1743 1744 rv = vm_map_find(map, obj->vm_obj, NULL, 1745 args->offset, &addr, args->size, 1746 256 * 1024, /* align */ 1747 TRUE, /* fitit */ 1748 VM_MAPTYPE_NORMAL, /* maptype */ 1749 VM_PROT_READ | VM_PROT_WRITE, /* prot */ 1750 VM_PROT_READ | VM_PROT_WRITE, /* max */ 1751 MAP_SHARED /* cow */); 1752 if (rv != KERN_SUCCESS) { 1753 vm_object_deallocate(obj->vm_obj); 1754 error = -vm_mmap_to_errno(rv); 1755 } else { 1756 args->addr_ptr = (uint64_t)addr; 1757 } 1758 out: 1759 drm_gem_object_unreference(obj); 1760 return (error); 1761 } 1762 1763 /** 1764 * i915_gem_fault - fault a page into the GTT 1765 * 1766 * vm_obj is locked on entry and expected to be locked on return. 1767 * 1768 * The vm_pager has placemarked the object with an anonymous memory page 1769 * which we must replace atomically to avoid races against concurrent faults 1770 * on the same page. XXX we currently are unable to do this atomically. 1771 * 1772 * If we are to return an error we should not touch the anonymous page, 1773 * the caller will deallocate it. 1774 * 1775 * XXX Most GEM calls appear to be interruptable, but we can't hard loop 1776 * in that case. Release all resources and wait 1 tick before retrying. 1777 * This is a huge problem which needs to be fixed by getting rid of most 1778 * of the interruptability. The linux code does not retry but does appear 1779 * to have some sort of mechanism (VM_FAULT_NOPAGE ?) for the higher level 1780 * to be able to retry. 1781 * 1782 * -- 1783 * @vma: VMA in question 1784 * @vmf: fault info 1785 * 1786 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1787 * from userspace. The fault handler takes care of binding the object to 1788 * the GTT (if needed), allocating and programming a fence register (again, 1789 * only if needed based on whether the old reg is still valid or the object 1790 * is tiled) and inserting a new PTE into the faulting process. 1791 * 1792 * Note that the faulting process may involve evicting existing objects 1793 * from the GTT and/or fence registers to make room. So performance may 1794 * suffer if the GTT working set is large or there are few fence registers 1795 * left. 1796 * 1797 * vm_obj is locked on entry and expected to be locked on return. The VM 1798 * pager has placed an anonymous memory page at (obj,offset) which we have 1799 * to replace. 1800 */ 1801 int i915_gem_fault(vm_object_t vm_obj, vm_ooffset_t offset, int prot, vm_page_t *mres) 1802 { 1803 struct drm_i915_gem_object *obj = to_intel_bo(vm_obj->handle); 1804 struct drm_device *dev = obj->base.dev; 1805 struct drm_i915_private *dev_priv = dev->dev_private; 1806 struct i915_ggtt_view view = i915_ggtt_view_normal; 1807 unsigned long page_offset; 1808 vm_page_t m, oldm = NULL; 1809 int ret = 0; 1810 bool write = !!(prot & VM_PROT_WRITE); 1811 1812 intel_runtime_pm_get(dev_priv); 1813 1814 /* We don't use vmf->pgoff since that has the fake offset */ 1815 page_offset = (unsigned long)offset; 1816 1817 retry: 1818 ret = i915_mutex_lock_interruptible(dev); 1819 if (ret) 1820 goto out; 1821 1822 trace_i915_gem_object_fault(obj, page_offset, true, write); 1823 1824 /* Try to flush the object off the GPU first without holding the lock. 1825 * Upon reacquiring the lock, we will perform our sanity checks and then 1826 * repeat the flush holding the lock in the normal manner to catch cases 1827 * where we are gazumped. 1828 */ 1829 ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write); 1830 if (ret) 1831 goto unlock; 1832 1833 /* Access to snoopable pages through the GTT is incoherent. */ 1834 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) { 1835 ret = -EFAULT; 1836 goto unlock; 1837 } 1838 1839 /* Use a partial view if the object is bigger than the aperture. */ 1840 if (obj->base.size >= dev_priv->gtt.mappable_end && 1841 obj->tiling_mode == I915_TILING_NONE) { 1842 #if 0 1843 static const unsigned int chunk_size = 256; // 1 MiB 1844 1845 memset(&view, 0, sizeof(view)); 1846 view.type = I915_GGTT_VIEW_PARTIAL; 1847 view.params.partial.offset = rounddown(page_offset, chunk_size); 1848 view.params.partial.size = 1849 min_t(unsigned int, 1850 chunk_size, 1851 (vma->vm_end - vma->vm_start)/PAGE_SIZE - 1852 view.params.partial.offset); 1853 #endif 1854 } 1855 1856 /* Now pin it into the GTT if needed */ 1857 ret = i915_gem_object_ggtt_pin(obj, &view, 0, PIN_MAPPABLE); 1858 if (ret) 1859 goto unlock; 1860 1861 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1862 if (ret) 1863 goto unpin; 1864 1865 ret = i915_gem_object_get_fence(obj); 1866 if (ret) 1867 goto unpin; 1868 1869 /* 1870 * START FREEBSD MAGIC 1871 * 1872 * Add a pip count to avoid destruction and certain other 1873 * complex operations (such as collapses?) while unlocked. 1874 */ 1875 vm_object_pip_add(vm_obj, 1); 1876 1877 /* 1878 * XXX We must currently remove the placeholder page now to avoid 1879 * a deadlock against a concurrent i915_gem_release_mmap(). 1880 * Otherwise concurrent operation will block on the busy page 1881 * while holding locks which we need to obtain. 1882 */ 1883 if (*mres != NULL) { 1884 oldm = *mres; 1885 if ((oldm->flags & PG_BUSY) == 0) 1886 kprintf("i915_gem_fault: Page was not busy\n"); 1887 else 1888 vm_page_remove(oldm); 1889 *mres = NULL; 1890 } else { 1891 oldm = NULL; 1892 } 1893 1894 ret = 0; 1895 m = NULL; 1896 1897 /* 1898 * Since the object lock was dropped, another thread might have 1899 * faulted on the same GTT address and instantiated the mapping. 1900 * Recheck. 1901 */ 1902 m = vm_page_lookup(vm_obj, OFF_TO_IDX(offset)); 1903 if (m != NULL) { 1904 /* 1905 * Try to busy the page, retry on failure (non-zero ret). 1906 */ 1907 if (vm_page_busy_try(m, false)) { 1908 kprintf("i915_gem_fault: PG_BUSY\n"); 1909 ret = -EINTR; 1910 goto unlock; 1911 } 1912 goto have_page; 1913 } 1914 /* 1915 * END FREEBSD MAGIC 1916 */ 1917 1918 obj->fault_mappable = true; 1919 1920 /* Finally, remap it using the new GTT offset */ 1921 m = vm_phys_fictitious_to_vm_page(dev_priv->gtt.mappable_base + 1922 i915_gem_obj_ggtt_offset_view(obj, &view) + offset); 1923 if (m == NULL) { 1924 ret = -EFAULT; 1925 goto unpin; 1926 } 1927 KASSERT((m->flags & PG_FICTITIOUS) != 0, ("not fictitious %p", m)); 1928 KASSERT(m->wire_count == 1, ("wire_count not 1 %p", m)); 1929 1930 /* 1931 * Try to busy the page. Fails on non-zero return. 1932 */ 1933 if (vm_page_busy_try(m, false)) { 1934 kprintf("i915_gem_fault: PG_BUSY(2)\n"); 1935 ret = -EINTR; 1936 goto unpin; 1937 } 1938 m->valid = VM_PAGE_BITS_ALL; 1939 1940 #if 0 1941 if (unlikely(view.type == I915_GGTT_VIEW_PARTIAL)) { 1942 /* Overriding existing pages in partial view does not cause 1943 * us any trouble as TLBs are still valid because the fault 1944 * is due to userspace losing part of the mapping or never 1945 * having accessed it before (at this partials' range). 1946 */ 1947 unsigned long base = vma->vm_start + 1948 (view.params.partial.offset << PAGE_SHIFT); 1949 unsigned int i; 1950 1951 for (i = 0; i < view.params.partial.size; i++) { 1952 ret = vm_insert_pfn(vma, base + i * PAGE_SIZE, pfn + i); 1953 if (ret) 1954 break; 1955 } 1956 1957 obj->fault_mappable = true; 1958 } else { 1959 if (!obj->fault_mappable) { 1960 unsigned long size = min_t(unsigned long, 1961 vma->vm_end - vma->vm_start, 1962 obj->base.size); 1963 int i; 1964 1965 for (i = 0; i < size >> PAGE_SHIFT; i++) { 1966 ret = vm_insert_pfn(vma, 1967 (unsigned long)vma->vm_start + i * PAGE_SIZE, 1968 pfn + i); 1969 if (ret) 1970 break; 1971 } 1972 1973 obj->fault_mappable = true; 1974 } else 1975 ret = vm_insert_pfn(vma, 1976 (unsigned long)vmf->virtual_address, 1977 pfn + page_offset); 1978 #endif 1979 vm_page_insert(m, vm_obj, OFF_TO_IDX(offset)); 1980 #if 0 1981 } 1982 #endif 1983 1984 have_page: 1985 *mres = m; 1986 1987 i915_gem_object_ggtt_unpin_view(obj, &view); 1988 mutex_unlock(&dev->struct_mutex); 1989 ret = VM_PAGER_OK; 1990 goto done; 1991 1992 /* 1993 * ALTERNATIVE ERROR RETURN. 1994 * 1995 * OBJECT EXPECTED TO BE LOCKED. 1996 */ 1997 unpin: 1998 i915_gem_object_ggtt_unpin_view(obj, &view); 1999 unlock: 2000 mutex_unlock(&dev->struct_mutex); 2001 out: 2002 switch (ret) { 2003 case -EIO: 2004 /* 2005 * We eat errors when the gpu is terminally wedged to avoid 2006 * userspace unduly crashing (gl has no provisions for mmaps to 2007 * fail). But any other -EIO isn't ours (e.g. swap in failure) 2008 * and so needs to be reported. 2009 */ 2010 if (!i915_terminally_wedged(&dev_priv->gpu_error)) { 2011 // ret = VM_FAULT_SIGBUS; 2012 break; 2013 } 2014 case -EAGAIN: 2015 /* 2016 * EAGAIN means the gpu is hung and we'll wait for the error 2017 * handler to reset everything when re-faulting in 2018 * i915_mutex_lock_interruptible. 2019 */ 2020 case -ERESTARTSYS: 2021 case -EINTR: 2022 VM_OBJECT_UNLOCK(vm_obj); 2023 int dummy; 2024 tsleep(&dummy, 0, "delay", 1); /* XXX */ 2025 VM_OBJECT_LOCK(vm_obj); 2026 goto retry; 2027 default: 2028 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 2029 ret = VM_PAGER_ERROR; 2030 break; 2031 } 2032 2033 done: 2034 if (oldm != NULL) 2035 vm_page_free(oldm); 2036 vm_object_pip_wakeup(vm_obj); 2037 2038 intel_runtime_pm_put(dev_priv); 2039 return ret; 2040 } 2041 2042 /** 2043 * i915_gem_release_mmap - remove physical page mappings 2044 * @obj: obj in question 2045 * 2046 * Preserve the reservation of the mmapping with the DRM core code, but 2047 * relinquish ownership of the pages back to the system. 2048 * 2049 * It is vital that we remove the page mapping if we have mapped a tiled 2050 * object through the GTT and then lose the fence register due to 2051 * resource pressure. Similarly if the object has been moved out of the 2052 * aperture, than pages mapped into userspace must be revoked. Removing the 2053 * mapping will then trigger a page fault on the next user access, allowing 2054 * fixup by i915_gem_fault(). 2055 */ 2056 void 2057 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 2058 { 2059 vm_object_t devobj; 2060 vm_page_t m; 2061 int i, page_count; 2062 2063 if (!obj->fault_mappable) 2064 return; 2065 2066 devobj = cdev_pager_lookup(obj); 2067 if (devobj != NULL) { 2068 page_count = OFF_TO_IDX(obj->base.size); 2069 2070 VM_OBJECT_LOCK(devobj); 2071 for (i = 0; i < page_count; i++) { 2072 m = vm_page_lookup_busy_wait(devobj, i, TRUE, "915unm"); 2073 if (m == NULL) 2074 continue; 2075 cdev_pager_free_page(devobj, m); 2076 } 2077 VM_OBJECT_UNLOCK(devobj); 2078 vm_object_deallocate(devobj); 2079 } 2080 2081 obj->fault_mappable = false; 2082 } 2083 2084 void 2085 i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv) 2086 { 2087 struct drm_i915_gem_object *obj; 2088 2089 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) 2090 i915_gem_release_mmap(obj); 2091 } 2092 2093 uint32_t 2094 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) 2095 { 2096 uint32_t gtt_size; 2097 2098 if (INTEL_INFO(dev)->gen >= 4 || 2099 tiling_mode == I915_TILING_NONE) 2100 return size; 2101 2102 /* Previous chips need a power-of-two fence region when tiling */ 2103 if (INTEL_INFO(dev)->gen == 3) 2104 gtt_size = 1024*1024; 2105 else 2106 gtt_size = 512*1024; 2107 2108 while (gtt_size < size) 2109 gtt_size <<= 1; 2110 2111 return gtt_size; 2112 } 2113 2114 /** 2115 * i915_gem_get_gtt_alignment - return required GTT alignment for an object 2116 * @obj: object to check 2117 * 2118 * Return the required GTT alignment for an object, taking into account 2119 * potential fence register mapping. 2120 */ 2121 uint32_t 2122 i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size, 2123 int tiling_mode, bool fenced) 2124 { 2125 /* 2126 * Minimum alignment is 4k (GTT page size), but might be greater 2127 * if a fence register is needed for the object. 2128 */ 2129 if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) || 2130 tiling_mode == I915_TILING_NONE) 2131 return 4096; 2132 2133 /* 2134 * Previous chips need to be aligned to the size of the smallest 2135 * fence register that can contain the object. 2136 */ 2137 return i915_gem_get_gtt_size(dev, size, tiling_mode); 2138 } 2139 2140 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) 2141 { 2142 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2143 int ret; 2144 2145 #if 0 2146 if (drm_vma_node_has_offset(&obj->base.vma_node)) 2147 return 0; 2148 #endif 2149 2150 dev_priv->mm.shrinker_no_lock_stealing = true; 2151 2152 ret = drm_gem_create_mmap_offset(&obj->base); 2153 if (ret != -ENOSPC) 2154 goto out; 2155 2156 /* Badly fragmented mmap space? The only way we can recover 2157 * space is by destroying unwanted objects. We can't randomly release 2158 * mmap_offsets as userspace expects them to be persistent for the 2159 * lifetime of the objects. The closest we can is to release the 2160 * offsets on purgeable objects by truncating it and marking it purged, 2161 * which prevents userspace from ever using that object again. 2162 */ 2163 i915_gem_shrink(dev_priv, 2164 obj->base.size >> PAGE_SHIFT, 2165 I915_SHRINK_BOUND | 2166 I915_SHRINK_UNBOUND | 2167 I915_SHRINK_PURGEABLE); 2168 ret = drm_gem_create_mmap_offset(&obj->base); 2169 if (ret != -ENOSPC) 2170 goto out; 2171 2172 i915_gem_shrink_all(dev_priv); 2173 ret = drm_gem_create_mmap_offset(&obj->base); 2174 out: 2175 dev_priv->mm.shrinker_no_lock_stealing = false; 2176 2177 return ret; 2178 } 2179 2180 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) 2181 { 2182 drm_gem_free_mmap_offset(&obj->base); 2183 } 2184 2185 int 2186 i915_gem_mmap_gtt(struct drm_file *file, 2187 struct drm_device *dev, 2188 uint32_t handle, 2189 uint64_t *offset) 2190 { 2191 struct drm_i915_gem_object *obj; 2192 int ret; 2193 2194 ret = i915_mutex_lock_interruptible(dev); 2195 if (ret) 2196 return ret; 2197 2198 obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle)); 2199 if (&obj->base == NULL) { 2200 ret = -ENOENT; 2201 goto unlock; 2202 } 2203 2204 if (obj->madv != I915_MADV_WILLNEED) { 2205 DRM_DEBUG("Attempting to mmap a purgeable buffer\n"); 2206 ret = -EFAULT; 2207 goto out; 2208 } 2209 2210 ret = i915_gem_object_create_mmap_offset(obj); 2211 if (ret) 2212 goto out; 2213 2214 *offset = DRM_GEM_MAPPING_OFF(obj->base.map_list.key) | 2215 DRM_GEM_MAPPING_KEY; 2216 2217 out: 2218 drm_gem_object_unreference(&obj->base); 2219 unlock: 2220 mutex_unlock(&dev->struct_mutex); 2221 return ret; 2222 } 2223 2224 /** 2225 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 2226 * @dev: DRM device 2227 * @data: GTT mapping ioctl data 2228 * @file: GEM object info 2229 * 2230 * Simply returns the fake offset to userspace so it can mmap it. 2231 * The mmap call will end up in drm_gem_mmap(), which will set things 2232 * up so we can get faults in the handler above. 2233 * 2234 * The fault handler will take care of binding the object into the GTT 2235 * (since it may have been evicted to make room for something), allocating 2236 * a fence register, and mapping the appropriate aperture address into 2237 * userspace. 2238 */ 2239 int 2240 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 2241 struct drm_file *file) 2242 { 2243 struct drm_i915_gem_mmap_gtt *args = data; 2244 2245 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); 2246 } 2247 2248 /* Immediately discard the backing storage */ 2249 static void 2250 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 2251 { 2252 vm_object_t vm_obj; 2253 2254 vm_obj = obj->base.vm_obj; 2255 VM_OBJECT_LOCK(vm_obj); 2256 vm_object_page_remove(vm_obj, 0, 0, false); 2257 VM_OBJECT_UNLOCK(vm_obj); 2258 2259 obj->madv = __I915_MADV_PURGED; 2260 } 2261 2262 /* Try to discard unwanted pages */ 2263 static void 2264 i915_gem_object_invalidate(struct drm_i915_gem_object *obj) 2265 { 2266 #if 0 2267 struct address_space *mapping; 2268 #endif 2269 2270 switch (obj->madv) { 2271 case I915_MADV_DONTNEED: 2272 i915_gem_object_truncate(obj); 2273 case __I915_MADV_PURGED: 2274 return; 2275 } 2276 2277 #if 0 2278 if (obj->base.filp == NULL) 2279 return; 2280 2281 mapping = file_inode(obj->base.filp)->i_mapping, 2282 invalidate_mapping_pages(mapping, 0, (loff_t)-1); 2283 #endif 2284 } 2285 2286 static void 2287 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) 2288 { 2289 struct sg_page_iter sg_iter; 2290 int ret; 2291 2292 BUG_ON(obj->madv == __I915_MADV_PURGED); 2293 2294 ret = i915_gem_object_set_to_cpu_domain(obj, true); 2295 if (ret) { 2296 /* In the event of a disaster, abandon all caches and 2297 * hope for the best. 2298 */ 2299 WARN_ON(ret != -EIO); 2300 i915_gem_clflush_object(obj, true); 2301 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 2302 } 2303 2304 i915_gem_gtt_finish_object(obj); 2305 2306 if (i915_gem_object_needs_bit17_swizzle(obj)) 2307 i915_gem_object_save_bit_17_swizzle(obj); 2308 2309 if (obj->madv == I915_MADV_DONTNEED) 2310 obj->dirty = 0; 2311 2312 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) { 2313 struct vm_page *page = sg_page_iter_page(&sg_iter); 2314 2315 if (obj->dirty) 2316 set_page_dirty(page); 2317 2318 if (obj->madv == I915_MADV_WILLNEED) 2319 mark_page_accessed(page); 2320 2321 vm_page_busy_wait(page, FALSE, "i915gem"); 2322 vm_page_unwire(page, 1); 2323 vm_page_wakeup(page); 2324 } 2325 obj->dirty = 0; 2326 2327 sg_free_table(obj->pages); 2328 kfree(obj->pages); 2329 } 2330 2331 int 2332 i915_gem_object_put_pages(struct drm_i915_gem_object *obj) 2333 { 2334 const struct drm_i915_gem_object_ops *ops = obj->ops; 2335 2336 if (obj->pages == NULL) 2337 return 0; 2338 2339 if (obj->pages_pin_count) 2340 return -EBUSY; 2341 2342 BUG_ON(i915_gem_obj_bound_any(obj)); 2343 2344 /* ->put_pages might need to allocate memory for the bit17 swizzle 2345 * array, hence protect them from being reaped by removing them from gtt 2346 * lists early. */ 2347 list_del(&obj->global_list); 2348 2349 ops->put_pages(obj); 2350 obj->pages = NULL; 2351 2352 i915_gem_object_invalidate(obj); 2353 2354 return 0; 2355 } 2356 2357 static int 2358 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 2359 { 2360 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2361 int page_count, i; 2362 vm_object_t vm_obj; 2363 struct sg_table *st; 2364 struct scatterlist *sg; 2365 struct sg_page_iter sg_iter; 2366 struct vm_page *page; 2367 unsigned long last_pfn = 0; /* suppress gcc warning */ 2368 int ret; 2369 2370 /* Assert that the object is not currently in any GPU domain. As it 2371 * wasn't in the GTT, there shouldn't be any way it could have been in 2372 * a GPU cache 2373 */ 2374 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); 2375 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); 2376 2377 st = kmalloc(sizeof(*st), M_DRM, M_WAITOK); 2378 if (st == NULL) 2379 return -ENOMEM; 2380 2381 page_count = obj->base.size / PAGE_SIZE; 2382 if (sg_alloc_table(st, page_count, GFP_KERNEL)) { 2383 kfree(st); 2384 return -ENOMEM; 2385 } 2386 2387 /* Get the list of pages out of our struct file. They'll be pinned 2388 * at this point until we release them. 2389 * 2390 * Fail silently without starting the shrinker 2391 */ 2392 vm_obj = obj->base.vm_obj; 2393 VM_OBJECT_LOCK(vm_obj); 2394 sg = st->sgl; 2395 st->nents = 0; 2396 for (i = 0; i < page_count; i++) { 2397 page = shmem_read_mapping_page(vm_obj, i); 2398 if (IS_ERR(page)) { 2399 i915_gem_shrink(dev_priv, 2400 page_count, 2401 I915_SHRINK_BOUND | 2402 I915_SHRINK_UNBOUND | 2403 I915_SHRINK_PURGEABLE); 2404 page = shmem_read_mapping_page(vm_obj, i); 2405 } 2406 if (IS_ERR(page)) { 2407 /* We've tried hard to allocate the memory by reaping 2408 * our own buffer, now let the real VM do its job and 2409 * go down in flames if truly OOM. 2410 */ 2411 i915_gem_shrink_all(dev_priv); 2412 page = shmem_read_mapping_page(vm_obj, i); 2413 if (IS_ERR(page)) { 2414 ret = PTR_ERR(page); 2415 goto err_pages; 2416 } 2417 } 2418 #ifdef CONFIG_SWIOTLB 2419 if (swiotlb_nr_tbl()) { 2420 st->nents++; 2421 sg_set_page(sg, page, PAGE_SIZE, 0); 2422 sg = sg_next(sg); 2423 continue; 2424 } 2425 #endif 2426 if (!i || page_to_pfn(page) != last_pfn + 1) { 2427 if (i) 2428 sg = sg_next(sg); 2429 st->nents++; 2430 sg_set_page(sg, page, PAGE_SIZE, 0); 2431 } else { 2432 sg->length += PAGE_SIZE; 2433 } 2434 last_pfn = page_to_pfn(page); 2435 2436 /* Check that the i965g/gm workaround works. */ 2437 } 2438 #ifdef CONFIG_SWIOTLB 2439 if (!swiotlb_nr_tbl()) 2440 #endif 2441 sg_mark_end(sg); 2442 obj->pages = st; 2443 VM_OBJECT_UNLOCK(vm_obj); 2444 2445 ret = i915_gem_gtt_prepare_object(obj); 2446 if (ret) 2447 goto err_pages; 2448 2449 if (i915_gem_object_needs_bit17_swizzle(obj)) 2450 i915_gem_object_do_bit_17_swizzle(obj); 2451 2452 if (obj->tiling_mode != I915_TILING_NONE && 2453 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) 2454 i915_gem_object_pin_pages(obj); 2455 2456 return 0; 2457 2458 err_pages: 2459 sg_mark_end(sg); 2460 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) { 2461 page = sg_page_iter_page(&sg_iter); 2462 vm_page_busy_wait(page, FALSE, "i915gem"); 2463 vm_page_unwire(page, 0); 2464 vm_page_wakeup(page); 2465 } 2466 VM_OBJECT_UNLOCK(vm_obj); 2467 sg_free_table(st); 2468 kfree(st); 2469 2470 /* shmemfs first checks if there is enough memory to allocate the page 2471 * and reports ENOSPC should there be insufficient, along with the usual 2472 * ENOMEM for a genuine allocation failure. 2473 * 2474 * We use ENOSPC in our driver to mean that we have run out of aperture 2475 * space and so want to translate the error from shmemfs back to our 2476 * usual understanding of ENOMEM. 2477 */ 2478 if (ret == -ENOSPC) 2479 ret = -ENOMEM; 2480 2481 return ret; 2482 } 2483 2484 /* Ensure that the associated pages are gathered from the backing storage 2485 * and pinned into our object. i915_gem_object_get_pages() may be called 2486 * multiple times before they are released by a single call to 2487 * i915_gem_object_put_pages() - once the pages are no longer referenced 2488 * either as a result of memory pressure (reaping pages under the shrinker) 2489 * or as the object is itself released. 2490 */ 2491 int 2492 i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2493 { 2494 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2495 const struct drm_i915_gem_object_ops *ops = obj->ops; 2496 int ret; 2497 2498 if (obj->pages) 2499 return 0; 2500 2501 if (obj->madv != I915_MADV_WILLNEED) { 2502 DRM_DEBUG("Attempting to obtain a purgeable object\n"); 2503 return -EFAULT; 2504 } 2505 2506 BUG_ON(obj->pages_pin_count); 2507 2508 ret = ops->get_pages(obj); 2509 if (ret) 2510 return ret; 2511 2512 list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list); 2513 2514 obj->get_page.sg = obj->pages->sgl; 2515 obj->get_page.last = 0; 2516 2517 return 0; 2518 } 2519 2520 void i915_vma_move_to_active(struct i915_vma *vma, 2521 struct drm_i915_gem_request *req) 2522 { 2523 struct drm_i915_gem_object *obj = vma->obj; 2524 struct intel_engine_cs *ring; 2525 2526 ring = i915_gem_request_get_ring(req); 2527 2528 /* Add a reference if we're newly entering the active list. */ 2529 if (obj->active == 0) 2530 drm_gem_object_reference(&obj->base); 2531 obj->active |= intel_ring_flag(ring); 2532 2533 list_move_tail(&obj->ring_list[ring->id], &ring->active_list); 2534 i915_gem_request_assign(&obj->last_read_req[ring->id], req); 2535 2536 list_move_tail(&vma->mm_list, &vma->vm->active_list); 2537 } 2538 2539 static void 2540 i915_gem_object_retire__write(struct drm_i915_gem_object *obj) 2541 { 2542 RQ_BUG_ON(obj->last_write_req == NULL); 2543 RQ_BUG_ON(!(obj->active & intel_ring_flag(obj->last_write_req->ring))); 2544 2545 i915_gem_request_assign(&obj->last_write_req, NULL); 2546 intel_fb_obj_flush(obj, true, ORIGIN_CS); 2547 } 2548 2549 static void 2550 i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring) 2551 { 2552 struct i915_vma *vma; 2553 2554 RQ_BUG_ON(obj->last_read_req[ring] == NULL); 2555 RQ_BUG_ON(!(obj->active & (1 << ring))); 2556 2557 list_del_init(&obj->ring_list[ring]); 2558 i915_gem_request_assign(&obj->last_read_req[ring], NULL); 2559 2560 if (obj->last_write_req && obj->last_write_req->ring->id == ring) 2561 i915_gem_object_retire__write(obj); 2562 2563 obj->active &= ~(1 << ring); 2564 if (obj->active) 2565 return; 2566 2567 /* Bump our place on the bound list to keep it roughly in LRU order 2568 * so that we don't steal from recently used but inactive objects 2569 * (unless we are forced to ofc!) 2570 */ 2571 list_move_tail(&obj->global_list, 2572 &to_i915(obj->base.dev)->mm.bound_list); 2573 2574 list_for_each_entry(vma, &obj->vma_list, vma_link) { 2575 if (!list_empty(&vma->mm_list)) 2576 list_move_tail(&vma->mm_list, &vma->vm->inactive_list); 2577 } 2578 2579 i915_gem_request_assign(&obj->last_fenced_req, NULL); 2580 drm_gem_object_unreference(&obj->base); 2581 } 2582 2583 static int 2584 i915_gem_init_seqno(struct drm_device *dev, u32 seqno) 2585 { 2586 struct drm_i915_private *dev_priv = dev->dev_private; 2587 struct intel_engine_cs *ring; 2588 int ret, i, j; 2589 2590 /* Carefully retire all requests without writing to the rings */ 2591 for_each_ring(ring, dev_priv, i) { 2592 ret = intel_ring_idle(ring); 2593 if (ret) 2594 return ret; 2595 } 2596 i915_gem_retire_requests(dev); 2597 2598 /* Finally reset hw state */ 2599 for_each_ring(ring, dev_priv, i) { 2600 intel_ring_init_seqno(ring, seqno); 2601 2602 for (j = 0; j < ARRAY_SIZE(ring->semaphore.sync_seqno); j++) 2603 ring->semaphore.sync_seqno[j] = 0; 2604 } 2605 2606 return 0; 2607 } 2608 2609 int i915_gem_set_seqno(struct drm_device *dev, u32 seqno) 2610 { 2611 struct drm_i915_private *dev_priv = dev->dev_private; 2612 int ret; 2613 2614 if (seqno == 0) 2615 return -EINVAL; 2616 2617 /* HWS page needs to be set less than what we 2618 * will inject to ring 2619 */ 2620 ret = i915_gem_init_seqno(dev, seqno - 1); 2621 if (ret) 2622 return ret; 2623 2624 /* Carefully set the last_seqno value so that wrap 2625 * detection still works 2626 */ 2627 dev_priv->next_seqno = seqno; 2628 dev_priv->last_seqno = seqno - 1; 2629 if (dev_priv->last_seqno == 0) 2630 dev_priv->last_seqno--; 2631 2632 return 0; 2633 } 2634 2635 int 2636 i915_gem_get_seqno(struct drm_device *dev, u32 *seqno) 2637 { 2638 struct drm_i915_private *dev_priv = dev->dev_private; 2639 2640 /* reserve 0 for non-seqno */ 2641 if (dev_priv->next_seqno == 0) { 2642 int ret = i915_gem_init_seqno(dev, 0); 2643 if (ret) 2644 return ret; 2645 2646 dev_priv->next_seqno = 1; 2647 } 2648 2649 *seqno = dev_priv->last_seqno = dev_priv->next_seqno++; 2650 return 0; 2651 } 2652 2653 /* 2654 * NB: This function is not allowed to fail. Doing so would mean the the 2655 * request is not being tracked for completion but the work itself is 2656 * going to happen on the hardware. This would be a Bad Thing(tm). 2657 */ 2658 void __i915_add_request(struct drm_i915_gem_request *request, 2659 struct drm_i915_gem_object *obj, 2660 bool flush_caches) 2661 { 2662 struct intel_engine_cs *ring; 2663 struct drm_i915_private *dev_priv; 2664 struct intel_ringbuffer *ringbuf; 2665 u32 request_start; 2666 int ret; 2667 2668 if (WARN_ON(request == NULL)) 2669 return; 2670 2671 ring = request->ring; 2672 dev_priv = ring->dev->dev_private; 2673 ringbuf = request->ringbuf; 2674 2675 /* 2676 * To ensure that this call will not fail, space for its emissions 2677 * should already have been reserved in the ring buffer. Let the ring 2678 * know that it is time to use that space up. 2679 */ 2680 intel_ring_reserved_space_use(ringbuf); 2681 2682 request_start = intel_ring_get_tail(ringbuf); 2683 /* 2684 * Emit any outstanding flushes - execbuf can fail to emit the flush 2685 * after having emitted the batchbuffer command. Hence we need to fix 2686 * things up similar to emitting the lazy request. The difference here 2687 * is that the flush _must_ happen before the next request, no matter 2688 * what. 2689 */ 2690 if (flush_caches) { 2691 if (i915.enable_execlists) 2692 ret = logical_ring_flush_all_caches(request); 2693 else 2694 ret = intel_ring_flush_all_caches(request); 2695 /* Not allowed to fail! */ 2696 WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret); 2697 } 2698 2699 /* Record the position of the start of the request so that 2700 * should we detect the updated seqno part-way through the 2701 * GPU processing the request, we never over-estimate the 2702 * position of the head. 2703 */ 2704 request->postfix = intel_ring_get_tail(ringbuf); 2705 2706 if (i915.enable_execlists) 2707 ret = ring->emit_request(request); 2708 else { 2709 ret = ring->add_request(request); 2710 2711 request->tail = intel_ring_get_tail(ringbuf); 2712 } 2713 2714 /* Not allowed to fail! */ 2715 WARN(ret, "emit|add_request failed: %d!\n", ret); 2716 2717 request->head = request_start; 2718 2719 /* Whilst this request exists, batch_obj will be on the 2720 * active_list, and so will hold the active reference. Only when this 2721 * request is retired will the the batch_obj be moved onto the 2722 * inactive_list and lose its active reference. Hence we do not need 2723 * to explicitly hold another reference here. 2724 */ 2725 request->batch_obj = obj; 2726 2727 request->emitted_jiffies = jiffies; 2728 request->previous_seqno = ring->last_submitted_seqno; 2729 ring->last_submitted_seqno = request->seqno; 2730 list_add_tail(&request->list, &ring->request_list); 2731 2732 trace_i915_gem_request_add(request); 2733 2734 i915_queue_hangcheck(ring->dev); 2735 2736 queue_delayed_work(dev_priv->wq, 2737 &dev_priv->mm.retire_work, 2738 round_jiffies_up_relative(HZ)); 2739 intel_mark_busy(dev_priv->dev); 2740 2741 /* Sanity check that the reserved size was large enough. */ 2742 intel_ring_reserved_space_end(ringbuf); 2743 } 2744 2745 static bool i915_context_is_banned(struct drm_i915_private *dev_priv, 2746 const struct intel_context *ctx) 2747 { 2748 unsigned long elapsed; 2749 2750 elapsed = get_seconds() - ctx->hang_stats.guilty_ts; 2751 2752 if (ctx->hang_stats.banned) 2753 return true; 2754 2755 if (ctx->hang_stats.ban_period_seconds && 2756 elapsed <= ctx->hang_stats.ban_period_seconds) { 2757 if (!i915_gem_context_is_default(ctx)) { 2758 DRM_DEBUG("context hanging too fast, banning!\n"); 2759 return true; 2760 } else if (i915_stop_ring_allow_ban(dev_priv)) { 2761 if (i915_stop_ring_allow_warn(dev_priv)) 2762 DRM_ERROR("gpu hanging too fast, banning!\n"); 2763 return true; 2764 } 2765 } 2766 2767 return false; 2768 } 2769 2770 static void i915_set_reset_status(struct drm_i915_private *dev_priv, 2771 struct intel_context *ctx, 2772 const bool guilty) 2773 { 2774 struct i915_ctx_hang_stats *hs; 2775 2776 if (WARN_ON(!ctx)) 2777 return; 2778 2779 hs = &ctx->hang_stats; 2780 2781 if (guilty) { 2782 hs->banned = i915_context_is_banned(dev_priv, ctx); 2783 hs->batch_active++; 2784 hs->guilty_ts = get_seconds(); 2785 } else { 2786 hs->batch_pending++; 2787 } 2788 } 2789 2790 void i915_gem_request_free(struct kref *req_ref) 2791 { 2792 struct drm_i915_gem_request *req = container_of(req_ref, 2793 typeof(*req), ref); 2794 struct intel_context *ctx = req->ctx; 2795 2796 if (req->file_priv) 2797 i915_gem_request_remove_from_client(req); 2798 2799 if (ctx) { 2800 if (i915.enable_execlists) { 2801 if (ctx != req->ring->default_context) 2802 intel_lr_context_unpin(req); 2803 } 2804 2805 i915_gem_context_unreference(ctx); 2806 } 2807 2808 kfree(req); 2809 } 2810 2811 int i915_gem_request_alloc(struct intel_engine_cs *ring, 2812 struct intel_context *ctx, 2813 struct drm_i915_gem_request **req_out) 2814 { 2815 struct drm_i915_private *dev_priv = to_i915(ring->dev); 2816 struct drm_i915_gem_request *req; 2817 int ret; 2818 2819 if (!req_out) 2820 return -EINVAL; 2821 2822 *req_out = NULL; 2823 2824 req = kzalloc(sizeof(*req), GFP_KERNEL); 2825 if (req == NULL) 2826 return -ENOMEM; 2827 2828 ret = i915_gem_get_seqno(ring->dev, &req->seqno); 2829 if (ret) 2830 goto err; 2831 2832 kref_init(&req->ref); 2833 req->i915 = dev_priv; 2834 req->ring = ring; 2835 req->ctx = ctx; 2836 i915_gem_context_reference(req->ctx); 2837 2838 if (i915.enable_execlists) 2839 ret = intel_logical_ring_alloc_request_extras(req); 2840 else 2841 ret = intel_ring_alloc_request_extras(req); 2842 if (ret) { 2843 i915_gem_context_unreference(req->ctx); 2844 goto err; 2845 } 2846 2847 /* 2848 * Reserve space in the ring buffer for all the commands required to 2849 * eventually emit this request. This is to guarantee that the 2850 * i915_add_request() call can't fail. Note that the reserve may need 2851 * to be redone if the request is not actually submitted straight 2852 * away, e.g. because a GPU scheduler has deferred it. 2853 */ 2854 if (i915.enable_execlists) 2855 ret = intel_logical_ring_reserve_space(req); 2856 else 2857 ret = intel_ring_reserve_space(req); 2858 if (ret) { 2859 /* 2860 * At this point, the request is fully allocated even if not 2861 * fully prepared. Thus it can be cleaned up using the proper 2862 * free code. 2863 */ 2864 i915_gem_request_cancel(req); 2865 return ret; 2866 } 2867 2868 *req_out = req; 2869 return 0; 2870 2871 err: 2872 kfree(req); 2873 return ret; 2874 } 2875 2876 void i915_gem_request_cancel(struct drm_i915_gem_request *req) 2877 { 2878 intel_ring_reserved_space_cancel(req->ringbuf); 2879 2880 i915_gem_request_unreference(req); 2881 } 2882 2883 struct drm_i915_gem_request * 2884 i915_gem_find_active_request(struct intel_engine_cs *ring) 2885 { 2886 struct drm_i915_gem_request *request; 2887 2888 list_for_each_entry(request, &ring->request_list, list) { 2889 if (i915_gem_request_completed(request, false)) 2890 continue; 2891 2892 return request; 2893 } 2894 2895 return NULL; 2896 } 2897 2898 static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv, 2899 struct intel_engine_cs *ring) 2900 { 2901 struct drm_i915_gem_request *request; 2902 bool ring_hung; 2903 2904 request = i915_gem_find_active_request(ring); 2905 2906 if (request == NULL) 2907 return; 2908 2909 ring_hung = ring->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG; 2910 2911 i915_set_reset_status(dev_priv, request->ctx, ring_hung); 2912 2913 list_for_each_entry_continue(request, &ring->request_list, list) 2914 i915_set_reset_status(dev_priv, request->ctx, false); 2915 } 2916 2917 static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv, 2918 struct intel_engine_cs *ring) 2919 { 2920 while (!list_empty(&ring->active_list)) { 2921 struct drm_i915_gem_object *obj; 2922 2923 obj = list_first_entry(&ring->active_list, 2924 struct drm_i915_gem_object, 2925 ring_list[ring->id]); 2926 2927 i915_gem_object_retire__read(obj, ring->id); 2928 } 2929 2930 /* 2931 * Clear the execlists queue up before freeing the requests, as those 2932 * are the ones that keep the context and ringbuffer backing objects 2933 * pinned in place. 2934 */ 2935 while (!list_empty(&ring->execlist_queue)) { 2936 struct drm_i915_gem_request *submit_req; 2937 2938 submit_req = list_first_entry(&ring->execlist_queue, 2939 struct drm_i915_gem_request, 2940 execlist_link); 2941 list_del(&submit_req->execlist_link); 2942 2943 if (submit_req->ctx != ring->default_context) 2944 intel_lr_context_unpin(submit_req); 2945 2946 i915_gem_request_unreference(submit_req); 2947 } 2948 2949 /* 2950 * We must free the requests after all the corresponding objects have 2951 * been moved off active lists. Which is the same order as the normal 2952 * retire_requests function does. This is important if object hold 2953 * implicit references on things like e.g. ppgtt address spaces through 2954 * the request. 2955 */ 2956 while (!list_empty(&ring->request_list)) { 2957 struct drm_i915_gem_request *request; 2958 2959 request = list_first_entry(&ring->request_list, 2960 struct drm_i915_gem_request, 2961 list); 2962 2963 i915_gem_request_retire(request); 2964 } 2965 } 2966 2967 void i915_gem_reset(struct drm_device *dev) 2968 { 2969 struct drm_i915_private *dev_priv = dev->dev_private; 2970 struct intel_engine_cs *ring; 2971 int i; 2972 2973 /* 2974 * Before we free the objects from the requests, we need to inspect 2975 * them for finding the guilty party. As the requests only borrow 2976 * their reference to the objects, the inspection must be done first. 2977 */ 2978 for_each_ring(ring, dev_priv, i) 2979 i915_gem_reset_ring_status(dev_priv, ring); 2980 2981 for_each_ring(ring, dev_priv, i) 2982 i915_gem_reset_ring_cleanup(dev_priv, ring); 2983 2984 i915_gem_context_reset(dev); 2985 2986 i915_gem_restore_fences(dev); 2987 2988 WARN_ON(i915_verify_lists(dev)); 2989 } 2990 2991 /** 2992 * This function clears the request list as sequence numbers are passed. 2993 */ 2994 void 2995 i915_gem_retire_requests_ring(struct intel_engine_cs *ring) 2996 { 2997 WARN_ON(i915_verify_lists(ring->dev)); 2998 2999 /* Retire requests first as we use it above for the early return. 3000 * If we retire requests last, we may use a later seqno and so clear 3001 * the requests lists without clearing the active list, leading to 3002 * confusion. 3003 */ 3004 while (!list_empty(&ring->request_list)) { 3005 struct drm_i915_gem_request *request; 3006 3007 request = list_first_entry(&ring->request_list, 3008 struct drm_i915_gem_request, 3009 list); 3010 3011 if (!i915_gem_request_completed(request, true)) 3012 break; 3013 3014 i915_gem_request_retire(request); 3015 } 3016 3017 /* Move any buffers on the active list that are no longer referenced 3018 * by the ringbuffer to the flushing/inactive lists as appropriate, 3019 * before we free the context associated with the requests. 3020 */ 3021 while (!list_empty(&ring->active_list)) { 3022 struct drm_i915_gem_object *obj; 3023 3024 obj = list_first_entry(&ring->active_list, 3025 struct drm_i915_gem_object, 3026 ring_list[ring->id]); 3027 3028 if (!list_empty(&obj->last_read_req[ring->id]->list)) 3029 break; 3030 3031 i915_gem_object_retire__read(obj, ring->id); 3032 } 3033 3034 if (unlikely(ring->trace_irq_req && 3035 i915_gem_request_completed(ring->trace_irq_req, true))) { 3036 ring->irq_put(ring); 3037 i915_gem_request_assign(&ring->trace_irq_req, NULL); 3038 } 3039 3040 WARN_ON(i915_verify_lists(ring->dev)); 3041 } 3042 3043 bool 3044 i915_gem_retire_requests(struct drm_device *dev) 3045 { 3046 struct drm_i915_private *dev_priv = dev->dev_private; 3047 struct intel_engine_cs *ring; 3048 bool idle = true; 3049 int i; 3050 3051 for_each_ring(ring, dev_priv, i) { 3052 i915_gem_retire_requests_ring(ring); 3053 idle &= list_empty(&ring->request_list); 3054 if (i915.enable_execlists) { 3055 unsigned long flags; 3056 3057 spin_lock_irqsave(&ring->execlist_lock, flags); 3058 idle &= list_empty(&ring->execlist_queue); 3059 spin_unlock_irqrestore(&ring->execlist_lock, flags); 3060 3061 intel_execlists_retire_requests(ring); 3062 } 3063 } 3064 3065 if (idle) 3066 mod_delayed_work(dev_priv->wq, 3067 &dev_priv->mm.idle_work, 3068 msecs_to_jiffies(100)); 3069 3070 return idle; 3071 } 3072 3073 static void 3074 i915_gem_retire_work_handler(struct work_struct *work) 3075 { 3076 struct drm_i915_private *dev_priv = 3077 container_of(work, typeof(*dev_priv), mm.retire_work.work); 3078 struct drm_device *dev = dev_priv->dev; 3079 bool idle; 3080 3081 /* Come back later if the device is busy... */ 3082 idle = false; 3083 if (mutex_trylock(&dev->struct_mutex)) { 3084 idle = i915_gem_retire_requests(dev); 3085 mutex_unlock(&dev->struct_mutex); 3086 } 3087 if (!idle) 3088 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 3089 round_jiffies_up_relative(HZ)); 3090 } 3091 3092 static void 3093 i915_gem_idle_work_handler(struct work_struct *work) 3094 { 3095 struct drm_i915_private *dev_priv = 3096 container_of(work, typeof(*dev_priv), mm.idle_work.work); 3097 struct drm_device *dev = dev_priv->dev; 3098 struct intel_engine_cs *ring; 3099 int i; 3100 3101 for_each_ring(ring, dev_priv, i) 3102 if (!list_empty(&ring->request_list)) 3103 return; 3104 3105 intel_mark_idle(dev); 3106 3107 if (mutex_trylock(&dev->struct_mutex)) { 3108 struct intel_engine_cs *ring; 3109 int i; 3110 3111 for_each_ring(ring, dev_priv, i) 3112 i915_gem_batch_pool_fini(&ring->batch_pool); 3113 3114 mutex_unlock(&dev->struct_mutex); 3115 } 3116 } 3117 3118 /** 3119 * Ensures that an object will eventually get non-busy by flushing any required 3120 * write domains, emitting any outstanding lazy request and retiring and 3121 * completed requests. 3122 */ 3123 static int 3124 i915_gem_object_flush_active(struct drm_i915_gem_object *obj) 3125 { 3126 int i; 3127 3128 if (!obj->active) 3129 return 0; 3130 3131 for (i = 0; i < I915_NUM_RINGS; i++) { 3132 struct drm_i915_gem_request *req; 3133 3134 req = obj->last_read_req[i]; 3135 if (req == NULL) 3136 continue; 3137 3138 if (list_empty(&req->list)) 3139 goto retire; 3140 3141 if (i915_gem_request_completed(req, true)) { 3142 __i915_gem_request_retire__upto(req); 3143 retire: 3144 i915_gem_object_retire__read(obj, i); 3145 } 3146 } 3147 3148 return 0; 3149 } 3150 3151 /** 3152 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT 3153 * @DRM_IOCTL_ARGS: standard ioctl arguments 3154 * 3155 * Returns 0 if successful, else an error is returned with the remaining time in 3156 * the timeout parameter. 3157 * -ETIME: object is still busy after timeout 3158 * -ERESTARTSYS: signal interrupted the wait 3159 * -ENONENT: object doesn't exist 3160 * Also possible, but rare: 3161 * -EAGAIN: GPU wedged 3162 * -ENOMEM: damn 3163 * -ENODEV: Internal IRQ fail 3164 * -E?: The add request failed 3165 * 3166 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any 3167 * non-zero timeout parameter the wait ioctl will wait for the given number of 3168 * nanoseconds on an object becoming unbusy. Since the wait itself does so 3169 * without holding struct_mutex the object may become re-busied before this 3170 * function completes. A similar but shorter * race condition exists in the busy 3171 * ioctl 3172 */ 3173 int 3174 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 3175 { 3176 struct drm_i915_private *dev_priv = dev->dev_private; 3177 struct drm_i915_gem_wait *args = data; 3178 struct drm_i915_gem_object *obj; 3179 struct drm_i915_gem_request *req[I915_NUM_RINGS]; 3180 unsigned reset_counter; 3181 int i, n = 0; 3182 int ret; 3183 3184 if (args->flags != 0) 3185 return -EINVAL; 3186 3187 ret = i915_mutex_lock_interruptible(dev); 3188 if (ret) 3189 return ret; 3190 3191 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->bo_handle)); 3192 if (&obj->base == NULL) { 3193 mutex_unlock(&dev->struct_mutex); 3194 return -ENOENT; 3195 } 3196 3197 /* Need to make sure the object gets inactive eventually. */ 3198 ret = i915_gem_object_flush_active(obj); 3199 if (ret) 3200 goto out; 3201 3202 if (!obj->active) 3203 goto out; 3204 3205 /* Do this after OLR check to make sure we make forward progress polling 3206 * on this IOCTL with a timeout == 0 (like busy ioctl) 3207 */ 3208 if (args->timeout_ns == 0) { 3209 ret = -ETIME; 3210 goto out; 3211 } 3212 3213 drm_gem_object_unreference(&obj->base); 3214 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 3215 3216 for (i = 0; i < I915_NUM_RINGS; i++) { 3217 if (obj->last_read_req[i] == NULL) 3218 continue; 3219 3220 req[n++] = i915_gem_request_reference(obj->last_read_req[i]); 3221 } 3222 3223 mutex_unlock(&dev->struct_mutex); 3224 3225 for (i = 0; i < n; i++) { 3226 if (ret == 0) 3227 ret = __i915_wait_request(req[i], reset_counter, true, 3228 args->timeout_ns > 0 ? &args->timeout_ns : NULL, 3229 file->driver_priv); 3230 i915_gem_request_unreference__unlocked(req[i]); 3231 } 3232 return ret; 3233 3234 out: 3235 drm_gem_object_unreference(&obj->base); 3236 mutex_unlock(&dev->struct_mutex); 3237 return ret; 3238 } 3239 3240 static int 3241 __i915_gem_object_sync(struct drm_i915_gem_object *obj, 3242 struct intel_engine_cs *to, 3243 struct drm_i915_gem_request *from_req, 3244 struct drm_i915_gem_request **to_req) 3245 { 3246 struct intel_engine_cs *from; 3247 int ret; 3248 3249 from = i915_gem_request_get_ring(from_req); 3250 if (to == from) 3251 return 0; 3252 3253 if (i915_gem_request_completed(from_req, true)) 3254 return 0; 3255 3256 if (!i915_semaphore_is_enabled(obj->base.dev)) { 3257 struct drm_i915_private *i915 = to_i915(obj->base.dev); 3258 ret = __i915_wait_request(from_req, 3259 atomic_read(&i915->gpu_error.reset_counter), 3260 i915->mm.interruptible, 3261 NULL, 3262 &i915->rps.semaphores); 3263 if (ret) 3264 return ret; 3265 3266 i915_gem_object_retire_request(obj, from_req); 3267 } else { 3268 int idx = intel_ring_sync_index(from, to); 3269 u32 seqno = i915_gem_request_get_seqno(from_req); 3270 3271 WARN_ON(!to_req); 3272 3273 if (seqno <= from->semaphore.sync_seqno[idx]) 3274 return 0; 3275 3276 if (*to_req == NULL) { 3277 ret = i915_gem_request_alloc(to, to->default_context, to_req); 3278 if (ret) 3279 return ret; 3280 } 3281 3282 trace_i915_gem_ring_sync_to(*to_req, from, from_req); 3283 ret = to->semaphore.sync_to(*to_req, from, seqno); 3284 if (ret) 3285 return ret; 3286 3287 /* We use last_read_req because sync_to() 3288 * might have just caused seqno wrap under 3289 * the radar. 3290 */ 3291 from->semaphore.sync_seqno[idx] = 3292 i915_gem_request_get_seqno(obj->last_read_req[from->id]); 3293 } 3294 3295 return 0; 3296 } 3297 3298 /** 3299 * i915_gem_object_sync - sync an object to a ring. 3300 * 3301 * @obj: object which may be in use on another ring. 3302 * @to: ring we wish to use the object on. May be NULL. 3303 * @to_req: request we wish to use the object for. See below. 3304 * This will be allocated and returned if a request is 3305 * required but not passed in. 3306 * 3307 * This code is meant to abstract object synchronization with the GPU. 3308 * Calling with NULL implies synchronizing the object with the CPU 3309 * rather than a particular GPU ring. Conceptually we serialise writes 3310 * between engines inside the GPU. We only allow one engine to write 3311 * into a buffer at any time, but multiple readers. To ensure each has 3312 * a coherent view of memory, we must: 3313 * 3314 * - If there is an outstanding write request to the object, the new 3315 * request must wait for it to complete (either CPU or in hw, requests 3316 * on the same ring will be naturally ordered). 3317 * 3318 * - If we are a write request (pending_write_domain is set), the new 3319 * request must wait for outstanding read requests to complete. 3320 * 3321 * For CPU synchronisation (NULL to) no request is required. For syncing with 3322 * rings to_req must be non-NULL. However, a request does not have to be 3323 * pre-allocated. If *to_req is NULL and sync commands will be emitted then a 3324 * request will be allocated automatically and returned through *to_req. Note 3325 * that it is not guaranteed that commands will be emitted (because the system 3326 * might already be idle). Hence there is no need to create a request that 3327 * might never have any work submitted. Note further that if a request is 3328 * returned in *to_req, it is the responsibility of the caller to submit 3329 * that request (after potentially adding more work to it). 3330 * 3331 * Returns 0 if successful, else propagates up the lower layer error. 3332 */ 3333 int 3334 i915_gem_object_sync(struct drm_i915_gem_object *obj, 3335 struct intel_engine_cs *to, 3336 struct drm_i915_gem_request **to_req) 3337 { 3338 const bool readonly = obj->base.pending_write_domain == 0; 3339 struct drm_i915_gem_request *req[I915_NUM_RINGS]; 3340 int ret, i, n; 3341 3342 if (!obj->active) 3343 return 0; 3344 3345 if (to == NULL) 3346 return i915_gem_object_wait_rendering(obj, readonly); 3347 3348 n = 0; 3349 if (readonly) { 3350 if (obj->last_write_req) 3351 req[n++] = obj->last_write_req; 3352 } else { 3353 for (i = 0; i < I915_NUM_RINGS; i++) 3354 if (obj->last_read_req[i]) 3355 req[n++] = obj->last_read_req[i]; 3356 } 3357 for (i = 0; i < n; i++) { 3358 ret = __i915_gem_object_sync(obj, to, req[i], to_req); 3359 if (ret) 3360 return ret; 3361 } 3362 3363 return 0; 3364 } 3365 3366 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) 3367 { 3368 u32 old_write_domain, old_read_domains; 3369 3370 /* Force a pagefault for domain tracking on next user access */ 3371 i915_gem_release_mmap(obj); 3372 3373 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3374 return; 3375 3376 /* Wait for any direct GTT access to complete */ 3377 mb(); 3378 3379 old_read_domains = obj->base.read_domains; 3380 old_write_domain = obj->base.write_domain; 3381 3382 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT; 3383 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT; 3384 3385 trace_i915_gem_object_change_domain(obj, 3386 old_read_domains, 3387 old_write_domain); 3388 } 3389 3390 static int __i915_vma_unbind(struct i915_vma *vma, bool wait) 3391 { 3392 struct drm_i915_gem_object *obj = vma->obj; 3393 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3394 int ret; 3395 3396 if (list_empty(&vma->vma_link)) 3397 return 0; 3398 3399 if (!drm_mm_node_allocated(&vma->node)) { 3400 i915_gem_vma_destroy(vma); 3401 return 0; 3402 } 3403 3404 if (vma->pin_count) 3405 return -EBUSY; 3406 3407 BUG_ON(obj->pages == NULL); 3408 3409 if (wait) { 3410 ret = i915_gem_object_wait_rendering(obj, false); 3411 if (ret) 3412 return ret; 3413 } 3414 3415 if (i915_is_ggtt(vma->vm) && 3416 vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { 3417 i915_gem_object_finish_gtt(obj); 3418 3419 /* release the fence reg _after_ flushing */ 3420 ret = i915_gem_object_put_fence(obj); 3421 if (ret) 3422 return ret; 3423 } 3424 3425 trace_i915_vma_unbind(vma); 3426 3427 vma->vm->unbind_vma(vma); 3428 vma->bound = 0; 3429 3430 list_del_init(&vma->mm_list); 3431 if (i915_is_ggtt(vma->vm)) { 3432 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { 3433 obj->map_and_fenceable = false; 3434 } else if (vma->ggtt_view.pages) { 3435 sg_free_table(vma->ggtt_view.pages); 3436 kfree(vma->ggtt_view.pages); 3437 } 3438 vma->ggtt_view.pages = NULL; 3439 } 3440 3441 drm_mm_remove_node(&vma->node); 3442 i915_gem_vma_destroy(vma); 3443 3444 /* Since the unbound list is global, only move to that list if 3445 * no more VMAs exist. */ 3446 if (list_empty(&obj->vma_list)) 3447 list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list); 3448 3449 /* And finally now the object is completely decoupled from this vma, 3450 * we can drop its hold on the backing storage and allow it to be 3451 * reaped by the shrinker. 3452 */ 3453 i915_gem_object_unpin_pages(obj); 3454 3455 return 0; 3456 } 3457 3458 int i915_vma_unbind(struct i915_vma *vma) 3459 { 3460 return __i915_vma_unbind(vma, true); 3461 } 3462 3463 int __i915_vma_unbind_no_wait(struct i915_vma *vma) 3464 { 3465 return __i915_vma_unbind(vma, false); 3466 } 3467 3468 int i915_gpu_idle(struct drm_device *dev) 3469 { 3470 struct drm_i915_private *dev_priv = dev->dev_private; 3471 struct intel_engine_cs *ring; 3472 int ret, i; 3473 3474 /* Flush everything onto the inactive list. */ 3475 for_each_ring(ring, dev_priv, i) { 3476 if (!i915.enable_execlists) { 3477 struct drm_i915_gem_request *req; 3478 3479 ret = i915_gem_request_alloc(ring, ring->default_context, &req); 3480 if (ret) 3481 return ret; 3482 3483 ret = i915_switch_context(req); 3484 if (ret) { 3485 i915_gem_request_cancel(req); 3486 return ret; 3487 } 3488 3489 i915_add_request_no_flush(req); 3490 } 3491 3492 ret = intel_ring_idle(ring); 3493 if (ret) 3494 return ret; 3495 } 3496 3497 WARN_ON(i915_verify_lists(dev)); 3498 return 0; 3499 } 3500 3501 static bool i915_gem_valid_gtt_space(struct i915_vma *vma, 3502 unsigned long cache_level) 3503 { 3504 struct drm_mm_node *gtt_space = &vma->node; 3505 struct drm_mm_node *other; 3506 3507 /* 3508 * On some machines we have to be careful when putting differing types 3509 * of snoopable memory together to avoid the prefetcher crossing memory 3510 * domains and dying. During vm initialisation, we decide whether or not 3511 * these constraints apply and set the drm_mm.color_adjust 3512 * appropriately. 3513 */ 3514 if (vma->vm->mm.color_adjust == NULL) 3515 return true; 3516 3517 if (!drm_mm_node_allocated(gtt_space)) 3518 return true; 3519 3520 if (list_empty(>t_space->node_list)) 3521 return true; 3522 3523 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list); 3524 if (other->allocated && !other->hole_follows && other->color != cache_level) 3525 return false; 3526 3527 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list); 3528 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level) 3529 return false; 3530 3531 return true; 3532 } 3533 3534 /** 3535 * Finds free space in the GTT aperture and binds the object or a view of it 3536 * there. 3537 */ 3538 static struct i915_vma * 3539 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, 3540 struct i915_address_space *vm, 3541 const struct i915_ggtt_view *ggtt_view, 3542 unsigned alignment, 3543 uint64_t flags) 3544 { 3545 struct drm_device *dev = obj->base.dev; 3546 struct drm_i915_private *dev_priv = dev->dev_private; 3547 u32 fence_alignment, unfenced_alignment; 3548 u32 search_flag, alloc_flag; 3549 u64 start, end; 3550 u64 size, fence_size; 3551 struct i915_vma *vma; 3552 int ret; 3553 3554 if (i915_is_ggtt(vm)) { 3555 u32 view_size; 3556 3557 if (WARN_ON(!ggtt_view)) 3558 return ERR_PTR(-EINVAL); 3559 3560 view_size = i915_ggtt_view_size(obj, ggtt_view); 3561 3562 fence_size = i915_gem_get_gtt_size(dev, 3563 view_size, 3564 obj->tiling_mode); 3565 fence_alignment = i915_gem_get_gtt_alignment(dev, 3566 view_size, 3567 obj->tiling_mode, 3568 true); 3569 unfenced_alignment = i915_gem_get_gtt_alignment(dev, 3570 view_size, 3571 obj->tiling_mode, 3572 false); 3573 size = flags & PIN_MAPPABLE ? fence_size : view_size; 3574 } else { 3575 fence_size = i915_gem_get_gtt_size(dev, 3576 obj->base.size, 3577 obj->tiling_mode); 3578 fence_alignment = i915_gem_get_gtt_alignment(dev, 3579 obj->base.size, 3580 obj->tiling_mode, 3581 true); 3582 unfenced_alignment = 3583 i915_gem_get_gtt_alignment(dev, 3584 obj->base.size, 3585 obj->tiling_mode, 3586 false); 3587 size = flags & PIN_MAPPABLE ? fence_size : obj->base.size; 3588 } 3589 3590 start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; 3591 end = vm->total; 3592 if (flags & PIN_MAPPABLE) 3593 end = min_t(u64, end, dev_priv->gtt.mappable_end); 3594 if (flags & PIN_ZONE_4G) 3595 end = min_t(u64, end, (1ULL << 32)); 3596 3597 if (alignment == 0) 3598 alignment = flags & PIN_MAPPABLE ? fence_alignment : 3599 unfenced_alignment; 3600 if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) { 3601 DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n", 3602 ggtt_view ? ggtt_view->type : 0, 3603 alignment); 3604 return ERR_PTR(-EINVAL); 3605 } 3606 3607 /* If binding the object/GGTT view requires more space than the entire 3608 * aperture has, reject it early before evicting everything in a vain 3609 * attempt to find space. 3610 */ 3611 if (size > end) { 3612 DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%lu > %s aperture=%lu\n", 3613 ggtt_view ? ggtt_view->type : 0, 3614 size, 3615 flags & PIN_MAPPABLE ? "mappable" : "total", 3616 end); 3617 return ERR_PTR(-E2BIG); 3618 } 3619 3620 ret = i915_gem_object_get_pages(obj); 3621 if (ret) 3622 return ERR_PTR(ret); 3623 3624 i915_gem_object_pin_pages(obj); 3625 3626 vma = ggtt_view ? i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) : 3627 i915_gem_obj_lookup_or_create_vma(obj, vm); 3628 3629 if (IS_ERR(vma)) 3630 goto err_unpin; 3631 3632 if (flags & PIN_HIGH) { 3633 search_flag = DRM_MM_SEARCH_BELOW; 3634 alloc_flag = DRM_MM_CREATE_TOP; 3635 } else { 3636 search_flag = DRM_MM_SEARCH_DEFAULT; 3637 alloc_flag = DRM_MM_CREATE_DEFAULT; 3638 } 3639 3640 search_free: 3641 ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, 3642 size, alignment, 3643 obj->cache_level, 3644 start, end, 3645 search_flag, 3646 alloc_flag); 3647 if (ret) { 3648 ret = i915_gem_evict_something(dev, vm, size, alignment, 3649 obj->cache_level, 3650 start, end, 3651 flags); 3652 if (ret == 0) 3653 goto search_free; 3654 3655 goto err_free_vma; 3656 } 3657 if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) { 3658 ret = -EINVAL; 3659 goto err_remove_node; 3660 } 3661 3662 trace_i915_vma_bind(vma, flags); 3663 ret = i915_vma_bind(vma, obj->cache_level, flags); 3664 if (ret) 3665 goto err_remove_node; 3666 3667 list_move_tail(&obj->global_list, &dev_priv->mm.bound_list); 3668 list_add_tail(&vma->mm_list, &vm->inactive_list); 3669 3670 return vma; 3671 3672 err_remove_node: 3673 drm_mm_remove_node(&vma->node); 3674 err_free_vma: 3675 i915_gem_vma_destroy(vma); 3676 vma = ERR_PTR(ret); 3677 err_unpin: 3678 i915_gem_object_unpin_pages(obj); 3679 return vma; 3680 } 3681 3682 bool 3683 i915_gem_clflush_object(struct drm_i915_gem_object *obj, 3684 bool force) 3685 { 3686 /* If we don't have a page list set up, then we're not pinned 3687 * to GPU, and we can ignore the cache flush because it'll happen 3688 * again at bind time. 3689 */ 3690 if (obj->pages == NULL) 3691 return false; 3692 3693 /* 3694 * Stolen memory is always coherent with the GPU as it is explicitly 3695 * marked as wc by the system, or the system is cache-coherent. 3696 */ 3697 if (obj->stolen || obj->phys_handle) 3698 return false; 3699 3700 /* If the GPU is snooping the contents of the CPU cache, 3701 * we do not need to manually clear the CPU cache lines. However, 3702 * the caches are only snooped when the render cache is 3703 * flushed/invalidated. As we always have to emit invalidations 3704 * and flushes when moving into and out of the RENDER domain, correct 3705 * snooping behaviour occurs naturally as the result of our domain 3706 * tracking. 3707 */ 3708 if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) { 3709 obj->cache_dirty = true; 3710 return false; 3711 } 3712 3713 trace_i915_gem_object_clflush(obj); 3714 drm_clflush_sg(obj->pages); 3715 obj->cache_dirty = false; 3716 3717 return true; 3718 } 3719 3720 /** Flushes the GTT write domain for the object if it's dirty. */ 3721 static void 3722 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) 3723 { 3724 uint32_t old_write_domain; 3725 3726 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) 3727 return; 3728 3729 /* No actual flushing is required for the GTT write domain. Writes 3730 * to it immediately go to main memory as far as we know, so there's 3731 * no chipset flush. It also doesn't land in render cache. 3732 * 3733 * However, we do have to enforce the order so that all writes through 3734 * the GTT land before any writes to the device, such as updates to 3735 * the GATT itself. 3736 */ 3737 wmb(); 3738 3739 old_write_domain = obj->base.write_domain; 3740 obj->base.write_domain = 0; 3741 3742 intel_fb_obj_flush(obj, false, ORIGIN_GTT); 3743 3744 trace_i915_gem_object_change_domain(obj, 3745 obj->base.read_domains, 3746 old_write_domain); 3747 } 3748 3749 /** Flushes the CPU write domain for the object if it's dirty. */ 3750 static void 3751 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) 3752 { 3753 uint32_t old_write_domain; 3754 3755 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) 3756 return; 3757 3758 if (i915_gem_clflush_object(obj, obj->pin_display)) 3759 i915_gem_chipset_flush(obj->base.dev); 3760 3761 old_write_domain = obj->base.write_domain; 3762 obj->base.write_domain = 0; 3763 3764 intel_fb_obj_flush(obj, false, ORIGIN_CPU); 3765 3766 trace_i915_gem_object_change_domain(obj, 3767 obj->base.read_domains, 3768 old_write_domain); 3769 } 3770 3771 /** 3772 * Moves a single object to the GTT read, and possibly write domain. 3773 * 3774 * This function returns when the move is complete, including waiting on 3775 * flushes to occur. 3776 */ 3777 int 3778 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 3779 { 3780 uint32_t old_write_domain, old_read_domains; 3781 struct i915_vma *vma; 3782 int ret; 3783 3784 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) 3785 return 0; 3786 3787 ret = i915_gem_object_wait_rendering(obj, !write); 3788 if (ret) 3789 return ret; 3790 3791 /* Flush and acquire obj->pages so that we are coherent through 3792 * direct access in memory with previous cached writes through 3793 * shmemfs and that our cache domain tracking remains valid. 3794 * For example, if the obj->filp was moved to swap without us 3795 * being notified and releasing the pages, we would mistakenly 3796 * continue to assume that the obj remained out of the CPU cached 3797 * domain. 3798 */ 3799 ret = i915_gem_object_get_pages(obj); 3800 if (ret) 3801 return ret; 3802 3803 i915_gem_object_flush_cpu_write_domain(obj); 3804 3805 /* Serialise direct access to this object with the barriers for 3806 * coherent writes from the GPU, by effectively invalidating the 3807 * GTT domain upon first access. 3808 */ 3809 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3810 mb(); 3811 3812 old_write_domain = obj->base.write_domain; 3813 old_read_domains = obj->base.read_domains; 3814 3815 /* It should now be out of any other write domains, and we can update 3816 * the domain values for our changes. 3817 */ 3818 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 3819 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 3820 if (write) { 3821 obj->base.read_domains = I915_GEM_DOMAIN_GTT; 3822 obj->base.write_domain = I915_GEM_DOMAIN_GTT; 3823 obj->dirty = 1; 3824 } 3825 3826 trace_i915_gem_object_change_domain(obj, 3827 old_read_domains, 3828 old_write_domain); 3829 3830 /* And bump the LRU for this access */ 3831 vma = i915_gem_obj_to_ggtt(obj); 3832 if (vma && drm_mm_node_allocated(&vma->node) && !obj->active) 3833 list_move_tail(&vma->mm_list, 3834 &to_i915(obj->base.dev)->gtt.base.inactive_list); 3835 3836 return 0; 3837 } 3838 3839 /** 3840 * Changes the cache-level of an object across all VMA. 3841 * 3842 * After this function returns, the object will be in the new cache-level 3843 * across all GTT and the contents of the backing storage will be coherent, 3844 * with respect to the new cache-level. In order to keep the backing storage 3845 * coherent for all users, we only allow a single cache level to be set 3846 * globally on the object and prevent it from being changed whilst the 3847 * hardware is reading from the object. That is if the object is currently 3848 * on the scanout it will be set to uncached (or equivalent display 3849 * cache coherency) and all non-MOCS GPU access will also be uncached so 3850 * that all direct access to the scanout remains coherent. 3851 */ 3852 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 3853 enum i915_cache_level cache_level) 3854 { 3855 struct drm_device *dev = obj->base.dev; 3856 struct i915_vma *vma, *next; 3857 bool bound = false; 3858 int ret = 0; 3859 3860 if (obj->cache_level == cache_level) 3861 goto out; 3862 3863 /* Inspect the list of currently bound VMA and unbind any that would 3864 * be invalid given the new cache-level. This is principally to 3865 * catch the issue of the CS prefetch crossing page boundaries and 3866 * reading an invalid PTE on older architectures. 3867 */ 3868 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) { 3869 if (!drm_mm_node_allocated(&vma->node)) 3870 continue; 3871 3872 if (vma->pin_count) { 3873 DRM_DEBUG("can not change the cache level of pinned objects\n"); 3874 return -EBUSY; 3875 } 3876 3877 if (!i915_gem_valid_gtt_space(vma, cache_level)) { 3878 ret = i915_vma_unbind(vma); 3879 if (ret) 3880 return ret; 3881 } else 3882 bound = true; 3883 } 3884 3885 /* We can reuse the existing drm_mm nodes but need to change the 3886 * cache-level on the PTE. We could simply unbind them all and 3887 * rebind with the correct cache-level on next use. However since 3888 * we already have a valid slot, dma mapping, pages etc, we may as 3889 * rewrite the PTE in the belief that doing so tramples upon less 3890 * state and so involves less work. 3891 */ 3892 if (bound) { 3893 /* Before we change the PTE, the GPU must not be accessing it. 3894 * If we wait upon the object, we know that all the bound 3895 * VMA are no longer active. 3896 */ 3897 ret = i915_gem_object_wait_rendering(obj, false); 3898 if (ret) 3899 return ret; 3900 3901 if (!HAS_LLC(dev) && cache_level != I915_CACHE_NONE) { 3902 /* Access to snoopable pages through the GTT is 3903 * incoherent and on some machines causes a hard 3904 * lockup. Relinquish the CPU mmaping to force 3905 * userspace to refault in the pages and we can 3906 * then double check if the GTT mapping is still 3907 * valid for that pointer access. 3908 */ 3909 i915_gem_release_mmap(obj); 3910 3911 /* As we no longer need a fence for GTT access, 3912 * we can relinquish it now (and so prevent having 3913 * to steal a fence from someone else on the next 3914 * fence request). Note GPU activity would have 3915 * dropped the fence as all snoopable access is 3916 * supposed to be linear. 3917 */ 3918 ret = i915_gem_object_put_fence(obj); 3919 if (ret) 3920 return ret; 3921 } else { 3922 /* We either have incoherent backing store and 3923 * so no GTT access or the architecture is fully 3924 * coherent. In such cases, existing GTT mmaps 3925 * ignore the cache bit in the PTE and we can 3926 * rewrite it without confusing the GPU or having 3927 * to force userspace to fault back in its mmaps. 3928 */ 3929 } 3930 3931 list_for_each_entry(vma, &obj->vma_list, vma_link) { 3932 if (!drm_mm_node_allocated(&vma->node)) 3933 continue; 3934 3935 ret = i915_vma_bind(vma, cache_level, PIN_UPDATE); 3936 if (ret) 3937 return ret; 3938 } 3939 } 3940 3941 list_for_each_entry(vma, &obj->vma_list, vma_link) 3942 vma->node.color = cache_level; 3943 obj->cache_level = cache_level; 3944 3945 out: 3946 /* Flush the dirty CPU caches to the backing storage so that the 3947 * object is now coherent at its new cache level (with respect 3948 * to the access domain). 3949 */ 3950 if (obj->cache_dirty && 3951 obj->base.write_domain != I915_GEM_DOMAIN_CPU && 3952 cpu_write_needs_clflush(obj)) { 3953 if (i915_gem_clflush_object(obj, true)) 3954 i915_gem_chipset_flush(obj->base.dev); 3955 } 3956 3957 return 0; 3958 } 3959 3960 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 3961 struct drm_file *file) 3962 { 3963 struct drm_i915_gem_caching *args = data; 3964 struct drm_i915_gem_object *obj; 3965 3966 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3967 if (&obj->base == NULL) 3968 return -ENOENT; 3969 3970 switch (obj->cache_level) { 3971 case I915_CACHE_LLC: 3972 case I915_CACHE_L3_LLC: 3973 args->caching = I915_CACHING_CACHED; 3974 break; 3975 3976 case I915_CACHE_WT: 3977 args->caching = I915_CACHING_DISPLAY; 3978 break; 3979 3980 default: 3981 args->caching = I915_CACHING_NONE; 3982 break; 3983 } 3984 3985 drm_gem_object_unreference_unlocked(&obj->base); 3986 return 0; 3987 } 3988 3989 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 3990 struct drm_file *file) 3991 { 3992 struct drm_i915_private *dev_priv = dev->dev_private; 3993 struct drm_i915_gem_caching *args = data; 3994 struct drm_i915_gem_object *obj; 3995 enum i915_cache_level level; 3996 int ret; 3997 3998 switch (args->caching) { 3999 case I915_CACHING_NONE: 4000 level = I915_CACHE_NONE; 4001 break; 4002 case I915_CACHING_CACHED: 4003 /* 4004 * Due to a HW issue on BXT A stepping, GPU stores via a 4005 * snooped mapping may leave stale data in a corresponding CPU 4006 * cacheline, whereas normally such cachelines would get 4007 * invalidated. 4008 */ 4009 if (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0) 4010 return -ENODEV; 4011 4012 level = I915_CACHE_LLC; 4013 break; 4014 case I915_CACHING_DISPLAY: 4015 level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE; 4016 break; 4017 default: 4018 return -EINVAL; 4019 } 4020 4021 intel_runtime_pm_get(dev_priv); 4022 4023 ret = i915_mutex_lock_interruptible(dev); 4024 if (ret) 4025 goto rpm_put; 4026 4027 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4028 if (&obj->base == NULL) { 4029 ret = -ENOENT; 4030 goto unlock; 4031 } 4032 4033 ret = i915_gem_object_set_cache_level(obj, level); 4034 4035 drm_gem_object_unreference(&obj->base); 4036 unlock: 4037 mutex_unlock(&dev->struct_mutex); 4038 rpm_put: 4039 intel_runtime_pm_put(dev_priv); 4040 4041 return ret; 4042 } 4043 4044 /* 4045 * Prepare buffer for display plane (scanout, cursors, etc). 4046 * Can be called from an uninterruptible phase (modesetting) and allows 4047 * any flushes to be pipelined (for pageflips). 4048 */ 4049 int 4050 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 4051 u32 alignment, 4052 struct intel_engine_cs *pipelined, 4053 struct drm_i915_gem_request **pipelined_request, 4054 const struct i915_ggtt_view *view) 4055 { 4056 u32 old_read_domains, old_write_domain; 4057 int ret; 4058 4059 ret = i915_gem_object_sync(obj, pipelined, pipelined_request); 4060 if (ret) 4061 return ret; 4062 4063 /* Mark the pin_display early so that we account for the 4064 * display coherency whilst setting up the cache domains. 4065 */ 4066 obj->pin_display++; 4067 4068 /* The display engine is not coherent with the LLC cache on gen6. As 4069 * a result, we make sure that the pinning that is about to occur is 4070 * done with uncached PTEs. This is lowest common denominator for all 4071 * chipsets. 4072 * 4073 * However for gen6+, we could do better by using the GFDT bit instead 4074 * of uncaching, which would allow us to flush all the LLC-cached data 4075 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 4076 */ 4077 ret = i915_gem_object_set_cache_level(obj, 4078 HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE); 4079 if (ret) 4080 goto err_unpin_display; 4081 4082 /* As the user may map the buffer once pinned in the display plane 4083 * (e.g. libkms for the bootup splash), we have to ensure that we 4084 * always use map_and_fenceable for all scanout buffers. 4085 */ 4086 ret = i915_gem_object_ggtt_pin(obj, view, alignment, 4087 view->type == I915_GGTT_VIEW_NORMAL ? 4088 PIN_MAPPABLE : 0); 4089 if (ret) 4090 goto err_unpin_display; 4091 4092 i915_gem_object_flush_cpu_write_domain(obj); 4093 4094 old_write_domain = obj->base.write_domain; 4095 old_read_domains = obj->base.read_domains; 4096 4097 /* It should now be out of any other write domains, and we can update 4098 * the domain values for our changes. 4099 */ 4100 obj->base.write_domain = 0; 4101 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 4102 4103 trace_i915_gem_object_change_domain(obj, 4104 old_read_domains, 4105 old_write_domain); 4106 4107 return 0; 4108 4109 err_unpin_display: 4110 obj->pin_display--; 4111 return ret; 4112 } 4113 4114 void 4115 i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj, 4116 const struct i915_ggtt_view *view) 4117 { 4118 if (WARN_ON(obj->pin_display == 0)) 4119 return; 4120 4121 i915_gem_object_ggtt_unpin_view(obj, view); 4122 4123 obj->pin_display--; 4124 } 4125 4126 /** 4127 * Moves a single object to the CPU read, and possibly write domain. 4128 * 4129 * This function returns when the move is complete, including waiting on 4130 * flushes to occur. 4131 */ 4132 int 4133 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 4134 { 4135 uint32_t old_write_domain, old_read_domains; 4136 int ret; 4137 4138 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) 4139 return 0; 4140 4141 ret = i915_gem_object_wait_rendering(obj, !write); 4142 if (ret) 4143 return ret; 4144 4145 i915_gem_object_flush_gtt_write_domain(obj); 4146 4147 old_write_domain = obj->base.write_domain; 4148 old_read_domains = obj->base.read_domains; 4149 4150 /* Flush the CPU cache if it's still invalid. */ 4151 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { 4152 i915_gem_clflush_object(obj, false); 4153 4154 obj->base.read_domains |= I915_GEM_DOMAIN_CPU; 4155 } 4156 4157 /* It should now be out of any other write domains, and we can update 4158 * the domain values for our changes. 4159 */ 4160 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 4161 4162 /* If we're writing through the CPU, then the GPU read domains will 4163 * need to be invalidated at next use. 4164 */ 4165 if (write) { 4166 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4167 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4168 } 4169 4170 trace_i915_gem_object_change_domain(obj, 4171 old_read_domains, 4172 old_write_domain); 4173 4174 return 0; 4175 } 4176 4177 /* Throttle our rendering by waiting until the ring has completed our requests 4178 * emitted over 20 msec ago. 4179 * 4180 * Note that if we were to use the current jiffies each time around the loop, 4181 * we wouldn't escape the function with any frames outstanding if the time to 4182 * render a frame was over 20ms. 4183 * 4184 * This should get us reasonable parallelism between CPU and GPU but also 4185 * relatively low latency when blocking on a particular request to finish. 4186 */ 4187 static int 4188 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 4189 { 4190 struct drm_i915_private *dev_priv = dev->dev_private; 4191 struct drm_i915_file_private *file_priv = file->driver_priv; 4192 unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES; 4193 struct drm_i915_gem_request *request, *target = NULL; 4194 unsigned reset_counter; 4195 int ret; 4196 4197 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 4198 if (ret) 4199 return ret; 4200 4201 ret = i915_gem_check_wedge(&dev_priv->gpu_error, false); 4202 if (ret) 4203 return ret; 4204 4205 spin_lock(&file_priv->mm.lock); 4206 list_for_each_entry(request, &file_priv->mm.request_list, client_list) { 4207 if (time_after_eq(request->emitted_jiffies, recent_enough)) 4208 break; 4209 4210 /* 4211 * Note that the request might not have been submitted yet. 4212 * In which case emitted_jiffies will be zero. 4213 */ 4214 if (!request->emitted_jiffies) 4215 continue; 4216 4217 target = request; 4218 } 4219 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 4220 if (target) 4221 i915_gem_request_reference(target); 4222 spin_unlock(&file_priv->mm.lock); 4223 4224 if (target == NULL) 4225 return 0; 4226 4227 ret = __i915_wait_request(target, reset_counter, true, NULL, NULL); 4228 if (ret == 0) 4229 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0); 4230 4231 i915_gem_request_unreference__unlocked(target); 4232 4233 return ret; 4234 } 4235 4236 static bool 4237 i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags) 4238 { 4239 struct drm_i915_gem_object *obj = vma->obj; 4240 4241 if (alignment && 4242 vma->node.start & (alignment - 1)) 4243 return true; 4244 4245 if (flags & PIN_MAPPABLE && !obj->map_and_fenceable) 4246 return true; 4247 4248 if (flags & PIN_OFFSET_BIAS && 4249 vma->node.start < (flags & PIN_OFFSET_MASK)) 4250 return true; 4251 4252 return false; 4253 } 4254 4255 void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) 4256 { 4257 struct drm_i915_gem_object *obj = vma->obj; 4258 bool mappable, fenceable; 4259 u32 fence_size, fence_alignment; 4260 4261 fence_size = i915_gem_get_gtt_size(obj->base.dev, 4262 obj->base.size, 4263 obj->tiling_mode); 4264 fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev, 4265 obj->base.size, 4266 obj->tiling_mode, 4267 true); 4268 4269 fenceable = (vma->node.size == fence_size && 4270 (vma->node.start & (fence_alignment - 1)) == 0); 4271 4272 mappable = (vma->node.start + fence_size <= 4273 to_i915(obj->base.dev)->gtt.mappable_end); 4274 4275 obj->map_and_fenceable = mappable && fenceable; 4276 } 4277 4278 static int 4279 i915_gem_object_do_pin(struct drm_i915_gem_object *obj, 4280 struct i915_address_space *vm, 4281 const struct i915_ggtt_view *ggtt_view, 4282 uint32_t alignment, 4283 uint64_t flags) 4284 { 4285 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 4286 struct i915_vma *vma; 4287 unsigned bound; 4288 int ret; 4289 4290 if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base)) 4291 return -ENODEV; 4292 4293 if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm))) 4294 return -EINVAL; 4295 4296 if (WARN_ON((flags & (PIN_MAPPABLE | PIN_GLOBAL)) == PIN_MAPPABLE)) 4297 return -EINVAL; 4298 4299 if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view)) 4300 return -EINVAL; 4301 4302 vma = ggtt_view ? i915_gem_obj_to_ggtt_view(obj, ggtt_view) : 4303 i915_gem_obj_to_vma(obj, vm); 4304 4305 if (IS_ERR(vma)) 4306 return PTR_ERR(vma); 4307 4308 if (vma) { 4309 if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) 4310 return -EBUSY; 4311 4312 if (i915_vma_misplaced(vma, alignment, flags)) { 4313 WARN(vma->pin_count, 4314 "bo is already pinned in %s with incorrect alignment:" 4315 " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d," 4316 " obj->map_and_fenceable=%d\n", 4317 ggtt_view ? "ggtt" : "ppgtt", 4318 upper_32_bits(vma->node.start), 4319 lower_32_bits(vma->node.start), 4320 alignment, 4321 !!(flags & PIN_MAPPABLE), 4322 obj->map_and_fenceable); 4323 ret = i915_vma_unbind(vma); 4324 if (ret) 4325 return ret; 4326 4327 vma = NULL; 4328 } 4329 } 4330 4331 bound = vma ? vma->bound : 0; 4332 if (vma == NULL || !drm_mm_node_allocated(&vma->node)) { 4333 vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment, 4334 flags); 4335 if (IS_ERR(vma)) 4336 return PTR_ERR(vma); 4337 } else { 4338 ret = i915_vma_bind(vma, obj->cache_level, flags); 4339 if (ret) 4340 return ret; 4341 } 4342 4343 if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL && 4344 (bound ^ vma->bound) & GLOBAL_BIND) { 4345 __i915_vma_set_map_and_fenceable(vma); 4346 WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable); 4347 } 4348 4349 vma->pin_count++; 4350 return 0; 4351 } 4352 4353 int 4354 i915_gem_object_pin(struct drm_i915_gem_object *obj, 4355 struct i915_address_space *vm, 4356 uint32_t alignment, 4357 uint64_t flags) 4358 { 4359 return i915_gem_object_do_pin(obj, vm, 4360 i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL, 4361 alignment, flags); 4362 } 4363 4364 int 4365 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, 4366 const struct i915_ggtt_view *view, 4367 uint32_t alignment, 4368 uint64_t flags) 4369 { 4370 if (WARN_ONCE(!view, "no view specified")) 4371 return -EINVAL; 4372 4373 return i915_gem_object_do_pin(obj, i915_obj_to_ggtt(obj), view, 4374 alignment, flags | PIN_GLOBAL); 4375 } 4376 4377 void 4378 i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj, 4379 const struct i915_ggtt_view *view) 4380 { 4381 struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view); 4382 4383 BUG_ON(!vma); 4384 WARN_ON(vma->pin_count == 0); 4385 WARN_ON(!i915_gem_obj_ggtt_bound_view(obj, view)); 4386 4387 --vma->pin_count; 4388 } 4389 4390 int 4391 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 4392 struct drm_file *file) 4393 { 4394 struct drm_i915_gem_busy *args = data; 4395 struct drm_i915_gem_object *obj; 4396 int ret; 4397 4398 ret = i915_mutex_lock_interruptible(dev); 4399 if (ret) 4400 return ret; 4401 4402 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4403 if (&obj->base == NULL) { 4404 ret = -ENOENT; 4405 goto unlock; 4406 } 4407 4408 /* Count all active objects as busy, even if they are currently not used 4409 * by the gpu. Users of this interface expect objects to eventually 4410 * become non-busy without any further actions, therefore emit any 4411 * necessary flushes here. 4412 */ 4413 ret = i915_gem_object_flush_active(obj); 4414 if (ret) 4415 goto unref; 4416 4417 BUILD_BUG_ON(I915_NUM_RINGS > 16); 4418 args->busy = obj->active << 16; 4419 if (obj->last_write_req) 4420 args->busy |= obj->last_write_req->ring->id; 4421 4422 unref: 4423 drm_gem_object_unreference(&obj->base); 4424 unlock: 4425 mutex_unlock(&dev->struct_mutex); 4426 return ret; 4427 } 4428 4429 int 4430 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 4431 struct drm_file *file_priv) 4432 { 4433 return i915_gem_ring_throttle(dev, file_priv); 4434 } 4435 4436 int 4437 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 4438 struct drm_file *file_priv) 4439 { 4440 struct drm_i915_private *dev_priv = dev->dev_private; 4441 struct drm_i915_gem_madvise *args = data; 4442 struct drm_i915_gem_object *obj; 4443 int ret; 4444 4445 switch (args->madv) { 4446 case I915_MADV_DONTNEED: 4447 case I915_MADV_WILLNEED: 4448 break; 4449 default: 4450 return -EINVAL; 4451 } 4452 4453 ret = i915_mutex_lock_interruptible(dev); 4454 if (ret) 4455 return ret; 4456 4457 obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle)); 4458 if (&obj->base == NULL) { 4459 ret = -ENOENT; 4460 goto unlock; 4461 } 4462 4463 if (i915_gem_obj_is_pinned(obj)) { 4464 ret = -EINVAL; 4465 goto out; 4466 } 4467 4468 if (obj->pages && 4469 obj->tiling_mode != I915_TILING_NONE && 4470 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 4471 if (obj->madv == I915_MADV_WILLNEED) 4472 i915_gem_object_unpin_pages(obj); 4473 if (args->madv == I915_MADV_WILLNEED) 4474 i915_gem_object_pin_pages(obj); 4475 } 4476 4477 if (obj->madv != __I915_MADV_PURGED) 4478 obj->madv = args->madv; 4479 4480 /* if the object is no longer attached, discard its backing storage */ 4481 if (obj->madv == I915_MADV_DONTNEED && obj->pages == NULL) 4482 i915_gem_object_truncate(obj); 4483 4484 args->retained = obj->madv != __I915_MADV_PURGED; 4485 4486 out: 4487 drm_gem_object_unreference(&obj->base); 4488 unlock: 4489 mutex_unlock(&dev->struct_mutex); 4490 return ret; 4491 } 4492 4493 void i915_gem_object_init(struct drm_i915_gem_object *obj, 4494 const struct drm_i915_gem_object_ops *ops) 4495 { 4496 int i; 4497 4498 INIT_LIST_HEAD(&obj->global_list); 4499 for (i = 0; i < I915_NUM_RINGS; i++) 4500 INIT_LIST_HEAD(&obj->ring_list[i]); 4501 INIT_LIST_HEAD(&obj->obj_exec_link); 4502 INIT_LIST_HEAD(&obj->vma_list); 4503 INIT_LIST_HEAD(&obj->batch_pool_link); 4504 4505 obj->ops = ops; 4506 4507 obj->fence_reg = I915_FENCE_REG_NONE; 4508 obj->madv = I915_MADV_WILLNEED; 4509 4510 i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size); 4511 } 4512 4513 static const struct drm_i915_gem_object_ops i915_gem_object_ops = { 4514 .get_pages = i915_gem_object_get_pages_gtt, 4515 .put_pages = i915_gem_object_put_pages_gtt, 4516 }; 4517 4518 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, 4519 size_t size) 4520 { 4521 struct drm_i915_gem_object *obj; 4522 #if 0 4523 struct address_space *mapping; 4524 gfp_t mask; 4525 #endif 4526 4527 obj = i915_gem_object_alloc(dev); 4528 if (obj == NULL) 4529 return NULL; 4530 4531 if (drm_gem_object_init(dev, &obj->base, size) != 0) { 4532 i915_gem_object_free(obj); 4533 return NULL; 4534 } 4535 4536 #if 0 4537 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 4538 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) { 4539 /* 965gm cannot relocate objects above 4GiB. */ 4540 mask &= ~__GFP_HIGHMEM; 4541 mask |= __GFP_DMA32; 4542 } 4543 4544 mapping = file_inode(obj->base.filp)->i_mapping; 4545 mapping_set_gfp_mask(mapping, mask); 4546 #endif 4547 4548 i915_gem_object_init(obj, &i915_gem_object_ops); 4549 4550 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4551 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4552 4553 if (HAS_LLC(dev)) { 4554 /* On some devices, we can have the GPU use the LLC (the CPU 4555 * cache) for about a 10% performance improvement 4556 * compared to uncached. Graphics requests other than 4557 * display scanout are coherent with the CPU in 4558 * accessing this cache. This means in this mode we 4559 * don't need to clflush on the CPU side, and on the 4560 * GPU side we only need to flush internal caches to 4561 * get data visible to the CPU. 4562 * 4563 * However, we maintain the display planes as UC, and so 4564 * need to rebind when first used as such. 4565 */ 4566 obj->cache_level = I915_CACHE_LLC; 4567 } else 4568 obj->cache_level = I915_CACHE_NONE; 4569 4570 trace_i915_gem_object_create(obj); 4571 4572 return obj; 4573 } 4574 4575 static bool discard_backing_storage(struct drm_i915_gem_object *obj) 4576 { 4577 /* If we are the last user of the backing storage (be it shmemfs 4578 * pages or stolen etc), we know that the pages are going to be 4579 * immediately released. In this case, we can then skip copying 4580 * back the contents from the GPU. 4581 */ 4582 4583 if (obj->madv != I915_MADV_WILLNEED) 4584 return false; 4585 4586 if (obj->base.vm_obj == NULL) 4587 return true; 4588 4589 /* At first glance, this looks racy, but then again so would be 4590 * userspace racing mmap against close. However, the first external 4591 * reference to the filp can only be obtained through the 4592 * i915_gem_mmap_ioctl() which safeguards us against the user 4593 * acquiring such a reference whilst we are in the middle of 4594 * freeing the object. 4595 */ 4596 #if 0 4597 return atomic_long_read(&obj->base.filp->f_count) == 1; 4598 #else 4599 return false; 4600 #endif 4601 } 4602 4603 void i915_gem_free_object(struct drm_gem_object *gem_obj) 4604 { 4605 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 4606 struct drm_device *dev = obj->base.dev; 4607 struct drm_i915_private *dev_priv = dev->dev_private; 4608 struct i915_vma *vma, *next; 4609 4610 intel_runtime_pm_get(dev_priv); 4611 4612 trace_i915_gem_object_destroy(obj); 4613 4614 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) { 4615 int ret; 4616 4617 vma->pin_count = 0; 4618 ret = i915_vma_unbind(vma); 4619 if (WARN_ON(ret == -ERESTARTSYS)) { 4620 bool was_interruptible; 4621 4622 was_interruptible = dev_priv->mm.interruptible; 4623 dev_priv->mm.interruptible = false; 4624 4625 WARN_ON(i915_vma_unbind(vma)); 4626 4627 dev_priv->mm.interruptible = was_interruptible; 4628 } 4629 } 4630 4631 /* Stolen objects don't hold a ref, but do hold pin count. Fix that up 4632 * before progressing. */ 4633 if (obj->stolen) 4634 i915_gem_object_unpin_pages(obj); 4635 4636 WARN_ON(obj->frontbuffer_bits); 4637 4638 if (obj->pages && obj->madv == I915_MADV_WILLNEED && 4639 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES && 4640 obj->tiling_mode != I915_TILING_NONE) 4641 i915_gem_object_unpin_pages(obj); 4642 4643 if (WARN_ON(obj->pages_pin_count)) 4644 obj->pages_pin_count = 0; 4645 if (discard_backing_storage(obj)) 4646 obj->madv = I915_MADV_DONTNEED; 4647 i915_gem_object_put_pages(obj); 4648 i915_gem_object_free_mmap_offset(obj); 4649 4650 BUG_ON(obj->pages); 4651 4652 #if 0 4653 if (obj->base.import_attach) 4654 drm_prime_gem_destroy(&obj->base, NULL); 4655 #endif 4656 4657 if (obj->ops->release) 4658 obj->ops->release(obj); 4659 4660 drm_gem_object_release(&obj->base); 4661 i915_gem_info_remove_obj(dev_priv, obj->base.size); 4662 4663 kfree(obj->bit_17); 4664 i915_gem_object_free(obj); 4665 4666 intel_runtime_pm_put(dev_priv); 4667 } 4668 4669 struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, 4670 struct i915_address_space *vm) 4671 { 4672 struct i915_vma *vma; 4673 list_for_each_entry(vma, &obj->vma_list, vma_link) { 4674 if (i915_is_ggtt(vma->vm) && 4675 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 4676 continue; 4677 if (vma->vm == vm) 4678 return vma; 4679 } 4680 return NULL; 4681 } 4682 4683 struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj, 4684 const struct i915_ggtt_view *view) 4685 { 4686 struct i915_address_space *ggtt = i915_obj_to_ggtt(obj); 4687 struct i915_vma *vma; 4688 4689 if (WARN_ONCE(!view, "no view specified")) 4690 return ERR_PTR(-EINVAL); 4691 4692 list_for_each_entry(vma, &obj->vma_list, vma_link) 4693 if (vma->vm == ggtt && 4694 i915_ggtt_view_equal(&vma->ggtt_view, view)) 4695 return vma; 4696 return NULL; 4697 } 4698 4699 void i915_gem_vma_destroy(struct i915_vma *vma) 4700 { 4701 struct i915_address_space *vm = NULL; 4702 WARN_ON(vma->node.allocated); 4703 4704 /* Keep the vma as a placeholder in the execbuffer reservation lists */ 4705 if (!list_empty(&vma->exec_list)) 4706 return; 4707 4708 vm = vma->vm; 4709 4710 if (!i915_is_ggtt(vm)) 4711 i915_ppgtt_put(i915_vm_to_ppgtt(vm)); 4712 4713 list_del(&vma->vma_link); 4714 4715 kfree(vma); 4716 } 4717 4718 static void 4719 i915_gem_stop_ringbuffers(struct drm_device *dev) 4720 { 4721 struct drm_i915_private *dev_priv = dev->dev_private; 4722 struct intel_engine_cs *ring; 4723 int i; 4724 4725 for_each_ring(ring, dev_priv, i) 4726 dev_priv->gt.stop_ring(ring); 4727 } 4728 4729 int 4730 i915_gem_suspend(struct drm_device *dev) 4731 { 4732 struct drm_i915_private *dev_priv = dev->dev_private; 4733 int ret = 0; 4734 4735 mutex_lock(&dev->struct_mutex); 4736 ret = i915_gpu_idle(dev); 4737 if (ret) 4738 goto err; 4739 4740 i915_gem_retire_requests(dev); 4741 4742 i915_gem_stop_ringbuffers(dev); 4743 mutex_unlock(&dev->struct_mutex); 4744 4745 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); 4746 cancel_delayed_work_sync(&dev_priv->mm.retire_work); 4747 #if 0 4748 flush_delayed_work(&dev_priv->mm.idle_work); 4749 #endif 4750 4751 /* Assert that we sucessfully flushed all the work and 4752 * reset the GPU back to its idle, low power state. 4753 */ 4754 WARN_ON(dev_priv->mm.busy); 4755 4756 return 0; 4757 4758 err: 4759 mutex_unlock(&dev->struct_mutex); 4760 return ret; 4761 } 4762 4763 int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice) 4764 { 4765 struct intel_engine_cs *ring = req->ring; 4766 struct drm_device *dev = ring->dev; 4767 struct drm_i915_private *dev_priv = dev->dev_private; 4768 u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200); 4769 u32 *remap_info = dev_priv->l3_parity.remap_info[slice]; 4770 int i, ret; 4771 4772 if (!HAS_L3_DPF(dev) || !remap_info) 4773 return 0; 4774 4775 ret = intel_ring_begin(req, GEN7_L3LOG_SIZE / 4 * 3); 4776 if (ret) 4777 return ret; 4778 4779 /* 4780 * Note: We do not worry about the concurrent register cacheline hang 4781 * here because no other code should access these registers other than 4782 * at initialization time. 4783 */ 4784 for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) { 4785 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 4786 intel_ring_emit(ring, reg_base + i); 4787 intel_ring_emit(ring, remap_info[i/4]); 4788 } 4789 4790 intel_ring_advance(ring); 4791 4792 return ret; 4793 } 4794 4795 void i915_gem_init_swizzling(struct drm_device *dev) 4796 { 4797 struct drm_i915_private *dev_priv = dev->dev_private; 4798 4799 if (INTEL_INFO(dev)->gen < 5 || 4800 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 4801 return; 4802 4803 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 4804 DISP_TILE_SURFACE_SWIZZLING); 4805 4806 if (IS_GEN5(dev)) 4807 return; 4808 4809 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 4810 if (IS_GEN6(dev)) 4811 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 4812 else if (IS_GEN7(dev)) 4813 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 4814 else if (IS_GEN8(dev)) 4815 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW)); 4816 else 4817 BUG(); 4818 } 4819 4820 static void init_unused_ring(struct drm_device *dev, u32 base) 4821 { 4822 struct drm_i915_private *dev_priv = dev->dev_private; 4823 4824 I915_WRITE(RING_CTL(base), 0); 4825 I915_WRITE(RING_HEAD(base), 0); 4826 I915_WRITE(RING_TAIL(base), 0); 4827 I915_WRITE(RING_START(base), 0); 4828 } 4829 4830 static void init_unused_rings(struct drm_device *dev) 4831 { 4832 if (IS_I830(dev)) { 4833 init_unused_ring(dev, PRB1_BASE); 4834 init_unused_ring(dev, SRB0_BASE); 4835 init_unused_ring(dev, SRB1_BASE); 4836 init_unused_ring(dev, SRB2_BASE); 4837 init_unused_ring(dev, SRB3_BASE); 4838 } else if (IS_GEN2(dev)) { 4839 init_unused_ring(dev, SRB0_BASE); 4840 init_unused_ring(dev, SRB1_BASE); 4841 } else if (IS_GEN3(dev)) { 4842 init_unused_ring(dev, PRB1_BASE); 4843 init_unused_ring(dev, PRB2_BASE); 4844 } 4845 } 4846 4847 int i915_gem_init_rings(struct drm_device *dev) 4848 { 4849 struct drm_i915_private *dev_priv = dev->dev_private; 4850 int ret; 4851 4852 ret = intel_init_render_ring_buffer(dev); 4853 if (ret) 4854 return ret; 4855 4856 if (HAS_BSD(dev)) { 4857 ret = intel_init_bsd_ring_buffer(dev); 4858 if (ret) 4859 goto cleanup_render_ring; 4860 } 4861 4862 if (HAS_BLT(dev)) { 4863 ret = intel_init_blt_ring_buffer(dev); 4864 if (ret) 4865 goto cleanup_bsd_ring; 4866 } 4867 4868 if (HAS_VEBOX(dev)) { 4869 ret = intel_init_vebox_ring_buffer(dev); 4870 if (ret) 4871 goto cleanup_blt_ring; 4872 } 4873 4874 if (HAS_BSD2(dev)) { 4875 ret = intel_init_bsd2_ring_buffer(dev); 4876 if (ret) 4877 goto cleanup_vebox_ring; 4878 } 4879 4880 return 0; 4881 4882 cleanup_vebox_ring: 4883 intel_cleanup_ring_buffer(&dev_priv->ring[VECS]); 4884 cleanup_blt_ring: 4885 intel_cleanup_ring_buffer(&dev_priv->ring[BCS]); 4886 cleanup_bsd_ring: 4887 intel_cleanup_ring_buffer(&dev_priv->ring[VCS]); 4888 cleanup_render_ring: 4889 intel_cleanup_ring_buffer(&dev_priv->ring[RCS]); 4890 4891 return ret; 4892 } 4893 4894 int 4895 i915_gem_init_hw(struct drm_device *dev) 4896 { 4897 struct drm_i915_private *dev_priv = dev->dev_private; 4898 struct intel_engine_cs *ring; 4899 int ret, i, j; 4900 4901 #if 0 4902 if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) 4903 return -EIO; 4904 #endif 4905 4906 /* Double layer security blanket, see i915_gem_init() */ 4907 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 4908 4909 if (dev_priv->ellc_size) 4910 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf)); 4911 4912 if (IS_HASWELL(dev)) 4913 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev) ? 4914 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); 4915 4916 if (HAS_PCH_NOP(dev)) { 4917 if (IS_IVYBRIDGE(dev)) { 4918 u32 temp = I915_READ(GEN7_MSG_CTL); 4919 temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK); 4920 I915_WRITE(GEN7_MSG_CTL, temp); 4921 } else if (INTEL_INFO(dev)->gen >= 7) { 4922 u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT); 4923 temp &= ~RESET_PCH_HANDSHAKE_ENABLE; 4924 I915_WRITE(HSW_NDE_RSTWRN_OPT, temp); 4925 } 4926 } 4927 4928 i915_gem_init_swizzling(dev); 4929 4930 /* 4931 * At least 830 can leave some of the unused rings 4932 * "active" (ie. head != tail) after resume which 4933 * will prevent c3 entry. Makes sure all unused rings 4934 * are totally idle. 4935 */ 4936 init_unused_rings(dev); 4937 4938 BUG_ON(!dev_priv->ring[RCS].default_context); 4939 4940 ret = i915_ppgtt_init_hw(dev); 4941 if (ret) { 4942 DRM_ERROR("PPGTT enable HW failed %d\n", ret); 4943 goto out; 4944 } 4945 4946 /* Need to do basic initialisation of all rings first: */ 4947 for_each_ring(ring, dev_priv, i) { 4948 ret = ring->init_hw(ring); 4949 if (ret) 4950 goto out; 4951 } 4952 4953 /* We can't enable contexts until all firmware is loaded */ 4954 if (HAS_GUC_UCODE(dev)) { 4955 #ifndef __DragonFly__ 4956 ret = intel_guc_ucode_load(dev); 4957 #else 4958 ret = -ENOEXEC; 4959 #endif 4960 if (ret) { 4961 /* 4962 * If we got an error and GuC submission is enabled, map 4963 * the error to -EIO so the GPU will be declared wedged. 4964 * OTOH, if we didn't intend to use the GuC anyway, just 4965 * discard the error and carry on. 4966 */ 4967 DRM_ERROR("Failed to initialize GuC, error %d%s\n", ret, 4968 i915.enable_guc_submission ? "" : 4969 " (ignored)"); 4970 ret = i915.enable_guc_submission ? -EIO : 0; 4971 if (ret) 4972 goto out; 4973 } 4974 } 4975 4976 /* 4977 * Increment the next seqno by 0x100 so we have a visible break 4978 * on re-initialisation 4979 */ 4980 ret = i915_gem_set_seqno(dev, dev_priv->next_seqno+0x100); 4981 if (ret) 4982 goto out; 4983 4984 /* Now it is safe to go back round and do everything else: */ 4985 for_each_ring(ring, dev_priv, i) { 4986 struct drm_i915_gem_request *req; 4987 4988 WARN_ON(!ring->default_context); 4989 4990 ret = i915_gem_request_alloc(ring, ring->default_context, &req); 4991 if (ret) { 4992 i915_gem_cleanup_ringbuffer(dev); 4993 goto out; 4994 } 4995 4996 if (ring->id == RCS) { 4997 for (j = 0; j < NUM_L3_SLICES(dev); j++) 4998 i915_gem_l3_remap(req, j); 4999 } 5000 5001 ret = i915_ppgtt_init_ring(req); 5002 if (ret && ret != -EIO) { 5003 DRM_ERROR("PPGTT enable ring #%d failed %d\n", i, ret); 5004 i915_gem_request_cancel(req); 5005 i915_gem_cleanup_ringbuffer(dev); 5006 goto out; 5007 } 5008 5009 ret = i915_gem_context_enable(req); 5010 if (ret && ret != -EIO) { 5011 DRM_ERROR("Context enable ring #%d failed %d\n", i, ret); 5012 i915_gem_request_cancel(req); 5013 i915_gem_cleanup_ringbuffer(dev); 5014 goto out; 5015 } 5016 5017 i915_add_request_no_flush(req); 5018 } 5019 5020 out: 5021 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5022 return ret; 5023 } 5024 5025 int i915_gem_init(struct drm_device *dev) 5026 { 5027 struct drm_i915_private *dev_priv = dev->dev_private; 5028 int ret; 5029 5030 i915.enable_execlists = intel_sanitize_enable_execlists(dev, 5031 i915.enable_execlists); 5032 5033 mutex_lock(&dev->struct_mutex); 5034 5035 if (IS_VALLEYVIEW(dev)) { 5036 /* VLVA0 (potential hack), BIOS isn't actually waking us */ 5037 I915_WRITE(VLV_GTLC_WAKE_CTRL, VLV_GTLC_ALLOWWAKEREQ); 5038 if (wait_for((I915_READ(VLV_GTLC_PW_STATUS) & 5039 VLV_GTLC_ALLOWWAKEACK), 10)) 5040 DRM_DEBUG_DRIVER("allow wake ack timed out\n"); 5041 } 5042 5043 if (!i915.enable_execlists) { 5044 dev_priv->gt.execbuf_submit = i915_gem_ringbuffer_submission; 5045 dev_priv->gt.init_rings = i915_gem_init_rings; 5046 dev_priv->gt.cleanup_ring = intel_cleanup_ring_buffer; 5047 dev_priv->gt.stop_ring = intel_stop_ring_buffer; 5048 } else { 5049 dev_priv->gt.execbuf_submit = intel_execlists_submission; 5050 dev_priv->gt.init_rings = intel_logical_rings_init; 5051 dev_priv->gt.cleanup_ring = intel_logical_ring_cleanup; 5052 dev_priv->gt.stop_ring = intel_logical_ring_stop; 5053 } 5054 5055 /* This is just a security blanket to placate dragons. 5056 * On some systems, we very sporadically observe that the first TLBs 5057 * used by the CS may be stale, despite us poking the TLB reset. If 5058 * we hold the forcewake during initialisation these problems 5059 * just magically go away. 5060 */ 5061 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5062 5063 ret = i915_gem_init_userptr(dev); 5064 if (ret) 5065 goto out_unlock; 5066 5067 i915_gem_init_global_gtt(dev); 5068 5069 ret = i915_gem_context_init(dev); 5070 if (ret) 5071 goto out_unlock; 5072 5073 ret = dev_priv->gt.init_rings(dev); 5074 if (ret) 5075 goto out_unlock; 5076 5077 ret = i915_gem_init_hw(dev); 5078 if (ret == -EIO) { 5079 /* Allow ring initialisation to fail by marking the GPU as 5080 * wedged. But we only want to do this where the GPU is angry, 5081 * for all other failure, such as an allocation failure, bail. 5082 */ 5083 DRM_ERROR("Failed to initialize GPU, declaring it wedged\n"); 5084 atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter); 5085 ret = 0; 5086 } 5087 5088 out_unlock: 5089 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5090 mutex_unlock(&dev->struct_mutex); 5091 5092 return ret; 5093 } 5094 5095 void 5096 i915_gem_cleanup_ringbuffer(struct drm_device *dev) 5097 { 5098 struct drm_i915_private *dev_priv = dev->dev_private; 5099 struct intel_engine_cs *ring; 5100 int i; 5101 5102 for_each_ring(ring, dev_priv, i) 5103 dev_priv->gt.cleanup_ring(ring); 5104 5105 if (i915.enable_execlists) 5106 /* 5107 * Neither the BIOS, ourselves or any other kernel 5108 * expects the system to be in execlists mode on startup, 5109 * so we need to reset the GPU back to legacy mode. 5110 */ 5111 intel_gpu_reset(dev); 5112 } 5113 5114 static void 5115 init_ring_lists(struct intel_engine_cs *ring) 5116 { 5117 INIT_LIST_HEAD(&ring->active_list); 5118 INIT_LIST_HEAD(&ring->request_list); 5119 } 5120 5121 void 5122 i915_gem_load(struct drm_device *dev) 5123 { 5124 struct drm_i915_private *dev_priv = dev->dev_private; 5125 int i; 5126 5127 INIT_LIST_HEAD(&dev_priv->vm_list); 5128 INIT_LIST_HEAD(&dev_priv->context_list); 5129 INIT_LIST_HEAD(&dev_priv->mm.unbound_list); 5130 INIT_LIST_HEAD(&dev_priv->mm.bound_list); 5131 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 5132 for (i = 0; i < I915_NUM_RINGS; i++) 5133 init_ring_lists(&dev_priv->ring[i]); 5134 for (i = 0; i < I915_MAX_NUM_FENCES; i++) 5135 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); 5136 INIT_DELAYED_WORK(&dev_priv->mm.retire_work, 5137 i915_gem_retire_work_handler); 5138 INIT_DELAYED_WORK(&dev_priv->mm.idle_work, 5139 i915_gem_idle_work_handler); 5140 init_waitqueue_head(&dev_priv->gpu_error.reset_queue); 5141 5142 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; 5143 5144 if (INTEL_INFO(dev)->gen >= 7 && !IS_VALLEYVIEW(dev)) 5145 dev_priv->num_fence_regs = 32; 5146 else if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) 5147 dev_priv->num_fence_regs = 16; 5148 else 5149 dev_priv->num_fence_regs = 8; 5150 5151 if (intel_vgpu_active(dev)) 5152 dev_priv->num_fence_regs = 5153 I915_READ(vgtif_reg(avail_rs.fence_num)); 5154 5155 /* 5156 * Set initial sequence number for requests. 5157 * Using this number allows the wraparound to happen early, 5158 * catching any obvious problems. 5159 */ 5160 dev_priv->next_seqno = ((u32)~0 - 0x1100); 5161 dev_priv->last_seqno = ((u32)~0 - 0x1101); 5162 5163 /* Initialize fence registers to zero */ 5164 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 5165 i915_gem_restore_fences(dev); 5166 5167 i915_gem_detect_bit_6_swizzle(dev); 5168 init_waitqueue_head(&dev_priv->pending_flip_queue); 5169 5170 dev_priv->mm.interruptible = true; 5171 5172 i915_gem_shrinker_init(dev_priv); 5173 5174 lockinit(&dev_priv->fb_tracking.lock, "drmftl", 0, LK_CANRECURSE); 5175 } 5176 5177 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 5178 { 5179 struct drm_i915_file_private *file_priv = file->driver_priv; 5180 5181 /* Clean up our request list when the client is going away, so that 5182 * later retire_requests won't dereference our soon-to-be-gone 5183 * file_priv. 5184 */ 5185 spin_lock(&file_priv->mm.lock); 5186 while (!list_empty(&file_priv->mm.request_list)) { 5187 struct drm_i915_gem_request *request; 5188 5189 request = list_first_entry(&file_priv->mm.request_list, 5190 struct drm_i915_gem_request, 5191 client_list); 5192 list_del(&request->client_list); 5193 request->file_priv = NULL; 5194 } 5195 spin_unlock(&file_priv->mm.lock); 5196 5197 if (!list_empty(&file_priv->rps.link)) { 5198 lockmgr(&to_i915(dev)->rps.client_lock, LK_EXCLUSIVE); 5199 list_del(&file_priv->rps.link); 5200 lockmgr(&to_i915(dev)->rps.client_lock, LK_RELEASE); 5201 } 5202 } 5203 5204 int 5205 i915_gem_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, 5206 vm_ooffset_t foff, struct ucred *cred, u_short *color) 5207 { 5208 *color = 0; /* XXXKIB */ 5209 return (0); 5210 } 5211 5212 void 5213 i915_gem_pager_dtor(void *handle) 5214 { 5215 struct drm_gem_object *obj; 5216 struct drm_device *dev; 5217 5218 obj = handle; 5219 dev = obj->dev; 5220 5221 mutex_lock(&dev->struct_mutex); 5222 drm_gem_free_mmap_offset(obj); 5223 i915_gem_release_mmap(to_intel_bo(obj)); 5224 drm_gem_object_unreference(obj); 5225 mutex_unlock(&dev->struct_mutex); 5226 } 5227 5228 int i915_gem_open(struct drm_device *dev, struct drm_file *file) 5229 { 5230 struct drm_i915_file_private *file_priv; 5231 int ret; 5232 5233 DRM_DEBUG_DRIVER("\n"); 5234 5235 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 5236 if (!file_priv) 5237 return -ENOMEM; 5238 5239 file->driver_priv = file_priv; 5240 file_priv->dev_priv = dev->dev_private; 5241 file_priv->file = file; 5242 INIT_LIST_HEAD(&file_priv->rps.link); 5243 5244 spin_init(&file_priv->mm.lock, "i915_priv"); 5245 INIT_LIST_HEAD(&file_priv->mm.request_list); 5246 5247 ret = i915_gem_context_open(dev, file); 5248 if (ret) 5249 kfree(file_priv); 5250 5251 return ret; 5252 } 5253 5254 /** 5255 * i915_gem_track_fb - update frontbuffer tracking 5256 * @old: current GEM buffer for the frontbuffer slots 5257 * @new: new GEM buffer for the frontbuffer slots 5258 * @frontbuffer_bits: bitmask of frontbuffer slots 5259 * 5260 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them 5261 * from @old and setting them in @new. Both @old and @new can be NULL. 5262 */ 5263 void i915_gem_track_fb(struct drm_i915_gem_object *old, 5264 struct drm_i915_gem_object *new, 5265 unsigned frontbuffer_bits) 5266 { 5267 if (old) { 5268 WARN_ON(!mutex_is_locked(&old->base.dev->struct_mutex)); 5269 WARN_ON(!(old->frontbuffer_bits & frontbuffer_bits)); 5270 old->frontbuffer_bits &= ~frontbuffer_bits; 5271 } 5272 5273 if (new) { 5274 WARN_ON(!mutex_is_locked(&new->base.dev->struct_mutex)); 5275 WARN_ON(new->frontbuffer_bits & frontbuffer_bits); 5276 new->frontbuffer_bits |= frontbuffer_bits; 5277 } 5278 } 5279 5280 /* All the new VM stuff */ 5281 u64 i915_gem_obj_offset(struct drm_i915_gem_object *o, 5282 struct i915_address_space *vm) 5283 { 5284 struct drm_i915_private *dev_priv = o->base.dev->dev_private; 5285 struct i915_vma *vma; 5286 5287 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); 5288 5289 list_for_each_entry(vma, &o->vma_list, vma_link) { 5290 if (i915_is_ggtt(vma->vm) && 5291 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5292 continue; 5293 if (vma->vm == vm) 5294 return vma->node.start; 5295 } 5296 5297 WARN(1, "%s vma for this object not found.\n", 5298 i915_is_ggtt(vm) ? "global" : "ppgtt"); 5299 return -1; 5300 } 5301 5302 u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o, 5303 const struct i915_ggtt_view *view) 5304 { 5305 struct i915_address_space *ggtt = i915_obj_to_ggtt(o); 5306 struct i915_vma *vma; 5307 5308 list_for_each_entry(vma, &o->vma_list, vma_link) 5309 if (vma->vm == ggtt && 5310 i915_ggtt_view_equal(&vma->ggtt_view, view)) 5311 return vma->node.start; 5312 5313 WARN(1, "global vma for this object not found. (view=%u)\n", view->type); 5314 return -1; 5315 } 5316 5317 bool i915_gem_obj_bound(struct drm_i915_gem_object *o, 5318 struct i915_address_space *vm) 5319 { 5320 struct i915_vma *vma; 5321 5322 list_for_each_entry(vma, &o->vma_list, vma_link) { 5323 if (i915_is_ggtt(vma->vm) && 5324 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5325 continue; 5326 if (vma->vm == vm && drm_mm_node_allocated(&vma->node)) 5327 return true; 5328 } 5329 5330 return false; 5331 } 5332 5333 bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o, 5334 const struct i915_ggtt_view *view) 5335 { 5336 struct i915_address_space *ggtt = i915_obj_to_ggtt(o); 5337 struct i915_vma *vma; 5338 5339 list_for_each_entry(vma, &o->vma_list, vma_link) 5340 if (vma->vm == ggtt && 5341 i915_ggtt_view_equal(&vma->ggtt_view, view) && 5342 drm_mm_node_allocated(&vma->node)) 5343 return true; 5344 5345 return false; 5346 } 5347 5348 bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o) 5349 { 5350 struct i915_vma *vma; 5351 5352 list_for_each_entry(vma, &o->vma_list, vma_link) 5353 if (drm_mm_node_allocated(&vma->node)) 5354 return true; 5355 5356 return false; 5357 } 5358 5359 unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o, 5360 struct i915_address_space *vm) 5361 { 5362 struct drm_i915_private *dev_priv = o->base.dev->dev_private; 5363 struct i915_vma *vma; 5364 5365 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); 5366 5367 BUG_ON(list_empty(&o->vma_list)); 5368 5369 list_for_each_entry(vma, &o->vma_list, vma_link) { 5370 if (i915_is_ggtt(vma->vm) && 5371 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5372 continue; 5373 if (vma->vm == vm) 5374 return vma->node.size; 5375 } 5376 return 0; 5377 } 5378 5379 bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj) 5380 { 5381 struct i915_vma *vma; 5382 list_for_each_entry(vma, &obj->vma_list, vma_link) 5383 if (vma->pin_count > 0) 5384 return true; 5385 5386 return false; 5387 } 5388 5389 #if 0 5390 /* Allocate a new GEM object and fill it with the supplied data */ 5391 struct drm_i915_gem_object * 5392 i915_gem_object_create_from_data(struct drm_device *dev, 5393 const void *data, size_t size) 5394 { 5395 struct drm_i915_gem_object *obj; 5396 struct sg_table *sg; 5397 size_t bytes; 5398 int ret; 5399 5400 obj = i915_gem_alloc_object(dev, round_up(size, PAGE_SIZE)); 5401 if (IS_ERR_OR_NULL(obj)) 5402 return obj; 5403 5404 ret = i915_gem_object_set_to_cpu_domain(obj, true); 5405 if (ret) 5406 goto fail; 5407 5408 ret = i915_gem_object_get_pages(obj); 5409 if (ret) 5410 goto fail; 5411 5412 i915_gem_object_pin_pages(obj); 5413 sg = obj->pages; 5414 bytes = sg_copy_from_buffer(sg->sgl, sg->nents, (void *)data, size); 5415 i915_gem_object_unpin_pages(obj); 5416 5417 if (WARN_ON(bytes != size)) { 5418 DRM_ERROR("Incomplete copy, wrote %zu of %zu", bytes, size); 5419 ret = -EFAULT; 5420 goto fail; 5421 } 5422 5423 return obj; 5424 5425 fail: 5426 drm_gem_object_unreference(&obj->base); 5427 return ERR_PTR(ret); 5428 } 5429 #endif 5430