1 /* 2 * Copyright © 2008-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include <drm/drmP.h> 29 #include <drm/drm_vma_manager.h> 30 #include <drm/i915_drm.h> 31 #include "i915_drv.h" 32 #include "i915_vgpu.h" 33 #include "i915_trace.h" 34 #include "intel_drv.h" 35 #include "intel_mocs.h" 36 #include <linux/shmem_fs.h> 37 #include <linux/slab.h> 38 #include <linux/swap.h> 39 #include <linux/pci.h> 40 #include <linux/dma-buf.h> 41 42 #include <sys/mman.h> 43 #include <vm/vm_map.h> 44 #include <vm/vm_param.h> 45 46 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); 47 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); 48 static void 49 i915_gem_object_retire__write(struct drm_i915_gem_object *obj); 50 static void 51 i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring); 52 53 static bool cpu_cache_is_coherent(struct drm_device *dev, 54 enum i915_cache_level level) 55 { 56 return HAS_LLC(dev) || level != I915_CACHE_NONE; 57 } 58 59 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 60 { 61 if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) 62 return true; 63 64 return obj->pin_display; 65 } 66 67 /* some bookkeeping */ 68 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 69 size_t size) 70 { 71 spin_lock(&dev_priv->mm.object_stat_lock); 72 dev_priv->mm.object_count++; 73 dev_priv->mm.object_memory += size; 74 spin_unlock(&dev_priv->mm.object_stat_lock); 75 } 76 77 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 78 size_t size) 79 { 80 spin_lock(&dev_priv->mm.object_stat_lock); 81 dev_priv->mm.object_count--; 82 dev_priv->mm.object_memory -= size; 83 spin_unlock(&dev_priv->mm.object_stat_lock); 84 } 85 86 static int 87 i915_gem_wait_for_error(struct i915_gpu_error *error) 88 { 89 int ret; 90 91 if (!i915_reset_in_progress(error)) 92 return 0; 93 94 /* 95 * Only wait 10 seconds for the gpu reset to complete to avoid hanging 96 * userspace. If it takes that long something really bad is going on and 97 * we should simply try to bail out and fail as gracefully as possible. 98 */ 99 ret = wait_event_interruptible_timeout(error->reset_queue, 100 !i915_reset_in_progress(error), 101 10*HZ); 102 if (ret == 0) { 103 DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); 104 return -EIO; 105 } else if (ret < 0) { 106 return ret; 107 } else { 108 return 0; 109 } 110 } 111 112 int i915_mutex_lock_interruptible(struct drm_device *dev) 113 { 114 struct drm_i915_private *dev_priv = dev->dev_private; 115 int ret; 116 117 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 118 if (ret) 119 return ret; 120 121 ret = mutex_lock_interruptible(&dev->struct_mutex); 122 if (ret) 123 return ret; 124 125 WARN_ON(i915_verify_lists(dev)); 126 return 0; 127 } 128 129 int 130 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 131 struct drm_file *file) 132 { 133 struct drm_i915_private *dev_priv = to_i915(dev); 134 struct i915_ggtt *ggtt = &dev_priv->ggtt; 135 struct drm_i915_gem_get_aperture *args = data; 136 struct i915_vma *vma; 137 size_t pinned; 138 139 pinned = 0; 140 mutex_lock(&dev->struct_mutex); 141 list_for_each_entry(vma, &ggtt->base.active_list, vm_link) 142 if (vma->pin_count) 143 pinned += vma->node.size; 144 list_for_each_entry(vma, &ggtt->base.inactive_list, vm_link) 145 if (vma->pin_count) 146 pinned += vma->node.size; 147 mutex_unlock(&dev->struct_mutex); 148 149 args->aper_size = ggtt->base.total; 150 args->aper_available_size = args->aper_size - pinned; 151 152 return 0; 153 } 154 155 #if 0 156 static int 157 i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) 158 { 159 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; 160 char *vaddr = obj->phys_handle->vaddr; 161 struct sg_table *st; 162 struct scatterlist *sg; 163 int i; 164 165 if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) 166 return -EINVAL; 167 168 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 169 struct page *page; 170 char *src; 171 172 page = shmem_read_mapping_page(mapping, i); 173 if (IS_ERR(page)) 174 return PTR_ERR(page); 175 176 src = kmap_atomic(page); 177 memcpy(vaddr, src, PAGE_SIZE); 178 drm_clflush_virt_range(vaddr, PAGE_SIZE); 179 kunmap_atomic(src); 180 181 put_page(page); 182 vaddr += PAGE_SIZE; 183 } 184 185 i915_gem_chipset_flush(obj->base.dev); 186 187 st = kmalloc(sizeof(*st), GFP_KERNEL); 188 if (st == NULL) 189 return -ENOMEM; 190 191 if (sg_alloc_table(st, 1, GFP_KERNEL)) { 192 kfree(st); 193 return -ENOMEM; 194 } 195 196 sg = st->sgl; 197 sg->offset = 0; 198 sg->length = obj->base.size; 199 200 sg_dma_address(sg) = obj->phys_handle->busaddr; 201 sg_dma_len(sg) = obj->base.size; 202 203 obj->pages = st; 204 return 0; 205 } 206 207 static void 208 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj) 209 { 210 int ret; 211 212 BUG_ON(obj->madv == __I915_MADV_PURGED); 213 214 ret = i915_gem_object_set_to_cpu_domain(obj, true); 215 if (WARN_ON(ret)) { 216 /* In the event of a disaster, abandon all caches and 217 * hope for the best. 218 */ 219 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 220 } 221 222 if (obj->madv == I915_MADV_DONTNEED) 223 obj->dirty = 0; 224 225 if (obj->dirty) { 226 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; 227 char *vaddr = obj->phys_handle->vaddr; 228 int i; 229 230 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 231 struct page *page; 232 char *dst; 233 234 page = shmem_read_mapping_page(mapping, i); 235 if (IS_ERR(page)) 236 continue; 237 238 dst = kmap_atomic(page); 239 drm_clflush_virt_range(vaddr, PAGE_SIZE); 240 memcpy(dst, vaddr, PAGE_SIZE); 241 kunmap_atomic(dst); 242 243 set_page_dirty(page); 244 if (obj->madv == I915_MADV_WILLNEED) 245 mark_page_accessed(page); 246 put_page(page); 247 vaddr += PAGE_SIZE; 248 } 249 obj->dirty = 0; 250 } 251 252 sg_free_table(obj->pages); 253 kfree(obj->pages); 254 } 255 256 static void 257 i915_gem_object_release_phys(struct drm_i915_gem_object *obj) 258 { 259 drm_pci_free(obj->base.dev, obj->phys_handle); 260 } 261 262 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { 263 .get_pages = i915_gem_object_get_pages_phys, 264 .put_pages = i915_gem_object_put_pages_phys, 265 .release = i915_gem_object_release_phys, 266 }; 267 #endif 268 269 static int 270 drop_pages(struct drm_i915_gem_object *obj) 271 { 272 struct i915_vma *vma, *next; 273 int ret; 274 275 drm_gem_object_reference(&obj->base); 276 list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) 277 if (i915_vma_unbind(vma)) 278 break; 279 280 ret = i915_gem_object_put_pages(obj); 281 drm_gem_object_unreference(&obj->base); 282 283 return ret; 284 } 285 286 int 287 i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, 288 int align) 289 { 290 drm_dma_handle_t *phys; 291 int ret; 292 293 if (obj->phys_handle) { 294 if ((unsigned long)obj->phys_handle->vaddr & (align -1)) 295 return -EBUSY; 296 297 return 0; 298 } 299 300 if (obj->madv != I915_MADV_WILLNEED) 301 return -EFAULT; 302 303 if (obj->base.filp == NULL) 304 return -EINVAL; 305 306 ret = drop_pages(obj); 307 if (ret) 308 return ret; 309 310 /* create a new object */ 311 phys = drm_pci_alloc(obj->base.dev, obj->base.size, align); 312 if (!phys) 313 return -ENOMEM; 314 315 obj->phys_handle = phys; 316 #if 0 317 obj->ops = &i915_gem_phys_ops; 318 #endif 319 320 return i915_gem_object_get_pages(obj); 321 } 322 323 static int 324 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, 325 struct drm_i915_gem_pwrite *args, 326 struct drm_file *file_priv) 327 { 328 struct drm_device *dev = obj->base.dev; 329 void *vaddr = (char *)obj->phys_handle->vaddr + args->offset; 330 char __user *user_data = u64_to_user_ptr(args->data_ptr); 331 int ret = 0; 332 333 /* We manually control the domain here and pretend that it 334 * remains coherent i.e. in the GTT domain, like shmem_pwrite. 335 */ 336 ret = i915_gem_object_wait_rendering(obj, false); 337 if (ret) 338 return ret; 339 340 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 341 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) { 342 unsigned long unwritten; 343 344 /* The physical object once assigned is fixed for the lifetime 345 * of the obj, so we can safely drop the lock and continue 346 * to access vaddr. 347 */ 348 mutex_unlock(&dev->struct_mutex); 349 unwritten = copy_from_user(vaddr, user_data, args->size); 350 mutex_lock(&dev->struct_mutex); 351 if (unwritten) { 352 ret = -EFAULT; 353 goto out; 354 } 355 } 356 357 drm_clflush_virt_range(vaddr, args->size); 358 i915_gem_chipset_flush(dev); 359 360 out: 361 intel_fb_obj_flush(obj, false, ORIGIN_CPU); 362 return ret; 363 } 364 365 void *i915_gem_object_alloc(struct drm_device *dev) 366 { 367 return kmalloc(sizeof(struct drm_i915_gem_object), 368 M_DRM, M_WAITOK | M_ZERO); 369 } 370 371 void i915_gem_object_free(struct drm_i915_gem_object *obj) 372 { 373 kfree(obj); 374 } 375 376 static int 377 i915_gem_create(struct drm_file *file, 378 struct drm_device *dev, 379 uint64_t size, 380 uint32_t *handle_p) 381 { 382 struct drm_i915_gem_object *obj; 383 int ret; 384 u32 handle; 385 386 size = roundup(size, PAGE_SIZE); 387 if (size == 0) 388 return -EINVAL; 389 390 /* Allocate the new object */ 391 obj = i915_gem_alloc_object(dev, size); 392 if (obj == NULL) 393 return -ENOMEM; 394 395 ret = drm_gem_handle_create(file, &obj->base, &handle); 396 /* drop reference from allocate - handle holds it now */ 397 drm_gem_object_unreference_unlocked(&obj->base); 398 if (ret) 399 return ret; 400 401 *handle_p = handle; 402 return 0; 403 } 404 405 int 406 i915_gem_dumb_create(struct drm_file *file, 407 struct drm_device *dev, 408 struct drm_mode_create_dumb *args) 409 { 410 /* have to work out size/pitch and return them */ 411 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); 412 args->size = args->pitch * args->height; 413 return i915_gem_create(file, dev, 414 args->size, &args->handle); 415 } 416 417 /** 418 * Creates a new mm object and returns a handle to it. 419 */ 420 int 421 i915_gem_create_ioctl(struct drm_device *dev, void *data, 422 struct drm_file *file) 423 { 424 struct drm_i915_gem_create *args = data; 425 426 return i915_gem_create(file, dev, 427 args->size, &args->handle); 428 } 429 430 static inline int 431 __copy_to_user_swizzled(char __user *cpu_vaddr, 432 const char *gpu_vaddr, int gpu_offset, 433 int length) 434 { 435 int ret, cpu_offset = 0; 436 437 while (length > 0) { 438 int cacheline_end = ALIGN(gpu_offset + 1, 64); 439 int this_length = min(cacheline_end - gpu_offset, length); 440 int swizzled_gpu_offset = gpu_offset ^ 64; 441 442 ret = __copy_to_user(cpu_vaddr + cpu_offset, 443 gpu_vaddr + swizzled_gpu_offset, 444 this_length); 445 if (ret) 446 return ret + length; 447 448 cpu_offset += this_length; 449 gpu_offset += this_length; 450 length -= this_length; 451 } 452 453 return 0; 454 } 455 456 static inline int 457 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, 458 const char __user *cpu_vaddr, 459 int length) 460 { 461 int ret, cpu_offset = 0; 462 463 while (length > 0) { 464 int cacheline_end = ALIGN(gpu_offset + 1, 64); 465 int this_length = min(cacheline_end - gpu_offset, length); 466 int swizzled_gpu_offset = gpu_offset ^ 64; 467 468 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, 469 cpu_vaddr + cpu_offset, 470 this_length); 471 if (ret) 472 return ret + length; 473 474 cpu_offset += this_length; 475 gpu_offset += this_length; 476 length -= this_length; 477 } 478 479 return 0; 480 } 481 482 /* 483 * Pins the specified object's pages and synchronizes the object with 484 * GPU accesses. Sets needs_clflush to non-zero if the caller should 485 * flush the object from the CPU cache. 486 */ 487 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, 488 int *needs_clflush) 489 { 490 int ret; 491 492 *needs_clflush = 0; 493 494 #if 0 495 if (WARN_ON((obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE) == 0)) 496 return -EINVAL; 497 #endif 498 499 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) { 500 /* If we're not in the cpu read domain, set ourself into the gtt 501 * read domain and manually flush cachelines (if required). This 502 * optimizes for the case when the gpu will dirty the data 503 * anyway again before the next pread happens. */ 504 *needs_clflush = !cpu_cache_is_coherent(obj->base.dev, 505 obj->cache_level); 506 ret = i915_gem_object_wait_rendering(obj, true); 507 if (ret) 508 return ret; 509 } 510 511 ret = i915_gem_object_get_pages(obj); 512 if (ret) 513 return ret; 514 515 i915_gem_object_pin_pages(obj); 516 517 return ret; 518 } 519 520 /* Per-page copy function for the shmem pread fastpath. 521 * Flushes invalid cachelines before reading the target if 522 * needs_clflush is set. */ 523 static int 524 shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length, 525 char __user *user_data, 526 bool page_do_bit17_swizzling, bool needs_clflush) 527 { 528 char *vaddr; 529 int ret; 530 531 if (unlikely(page_do_bit17_swizzling)) 532 return -EINVAL; 533 534 vaddr = kmap_atomic(page); 535 if (needs_clflush) 536 drm_clflush_virt_range(vaddr + shmem_page_offset, 537 page_length); 538 ret = __copy_to_user_inatomic(user_data, 539 vaddr + shmem_page_offset, 540 page_length); 541 kunmap_atomic(vaddr); 542 543 return ret ? -EFAULT : 0; 544 } 545 546 static void 547 shmem_clflush_swizzled_range(char *addr, unsigned long length, 548 bool swizzled) 549 { 550 if (unlikely(swizzled)) { 551 unsigned long start = (unsigned long) addr; 552 unsigned long end = (unsigned long) addr + length; 553 554 /* For swizzling simply ensure that we always flush both 555 * channels. Lame, but simple and it works. Swizzled 556 * pwrite/pread is far from a hotpath - current userspace 557 * doesn't use it at all. */ 558 start = round_down(start, 128); 559 end = round_up(end, 128); 560 561 drm_clflush_virt_range((void *)start, end - start); 562 } else { 563 drm_clflush_virt_range(addr, length); 564 } 565 566 } 567 568 /* Only difference to the fast-path function is that this can handle bit17 569 * and uses non-atomic copy and kmap functions. */ 570 static int 571 shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length, 572 char __user *user_data, 573 bool page_do_bit17_swizzling, bool needs_clflush) 574 { 575 char *vaddr; 576 int ret; 577 578 vaddr = kmap(page); 579 if (needs_clflush) 580 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 581 page_length, 582 page_do_bit17_swizzling); 583 584 if (page_do_bit17_swizzling) 585 ret = __copy_to_user_swizzled(user_data, 586 vaddr, shmem_page_offset, 587 page_length); 588 else 589 ret = __copy_to_user(user_data, 590 vaddr + shmem_page_offset, 591 page_length); 592 kunmap(page); 593 594 return ret ? - EFAULT : 0; 595 } 596 597 static int 598 i915_gem_shmem_pread(struct drm_device *dev, 599 struct drm_i915_gem_object *obj, 600 struct drm_i915_gem_pread *args, 601 struct drm_file *file) 602 { 603 char __user *user_data; 604 ssize_t remain; 605 loff_t offset; 606 int shmem_page_offset, page_length, ret = 0; 607 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 608 int prefaulted = 0; 609 int needs_clflush = 0; 610 struct sg_page_iter sg_iter; 611 612 user_data = u64_to_user_ptr(args->data_ptr); 613 remain = args->size; 614 615 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 616 617 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); 618 if (ret) 619 return ret; 620 621 offset = args->offset; 622 623 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 624 offset >> PAGE_SHIFT) { 625 struct page *page = sg_page_iter_page(&sg_iter); 626 627 if (remain <= 0) 628 break; 629 630 /* Operation in this page 631 * 632 * shmem_page_offset = offset within page in shmem file 633 * page_length = bytes to copy for this page 634 */ 635 shmem_page_offset = offset_in_page(offset); 636 page_length = remain; 637 if ((shmem_page_offset + page_length) > PAGE_SIZE) 638 page_length = PAGE_SIZE - shmem_page_offset; 639 640 page_do_bit17_swizzling = obj_do_bit17_swizzling && 641 (page_to_phys(page) & (1 << 17)) != 0; 642 643 ret = shmem_pread_fast(page, shmem_page_offset, page_length, 644 user_data, page_do_bit17_swizzling, 645 needs_clflush); 646 if (ret == 0) 647 goto next_page; 648 649 mutex_unlock(&dev->struct_mutex); 650 651 if (likely(!i915.prefault_disable) && !prefaulted) { 652 ret = fault_in_multipages_writeable(user_data, remain); 653 /* Userspace is tricking us, but we've already clobbered 654 * its pages with the prefault and promised to write the 655 * data up to the first fault. Hence ignore any errors 656 * and just continue. */ 657 (void)ret; 658 prefaulted = 1; 659 } 660 661 ret = shmem_pread_slow(page, shmem_page_offset, page_length, 662 user_data, page_do_bit17_swizzling, 663 needs_clflush); 664 665 mutex_lock(&dev->struct_mutex); 666 667 if (ret) 668 goto out; 669 670 next_page: 671 remain -= page_length; 672 user_data += page_length; 673 offset += page_length; 674 } 675 676 out: 677 i915_gem_object_unpin_pages(obj); 678 679 return ret; 680 } 681 682 /** 683 * Reads data from the object referenced by handle. 684 * 685 * On error, the contents of *data are undefined. 686 */ 687 int 688 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 689 struct drm_file *file) 690 { 691 struct drm_i915_gem_pread *args = data; 692 struct drm_i915_gem_object *obj; 693 int ret = 0; 694 695 if (args->size == 0) 696 return 0; 697 698 #if 0 699 if (!access_ok(VERIFY_WRITE, 700 u64_to_user_ptr(args->data_ptr), 701 args->size)) 702 return -EFAULT; 703 #endif 704 705 ret = i915_mutex_lock_interruptible(dev); 706 if (ret) 707 return ret; 708 709 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 710 if (&obj->base == NULL) { 711 ret = -ENOENT; 712 goto unlock; 713 } 714 715 /* Bounds check source. */ 716 if (args->offset > obj->base.size || 717 args->size > obj->base.size - args->offset) { 718 ret = -EINVAL; 719 goto out; 720 } 721 722 /* prime objects have no backing filp to GEM pread/pwrite 723 * pages from. 724 */ 725 726 trace_i915_gem_object_pread(obj, args->offset, args->size); 727 728 ret = i915_gem_shmem_pread(dev, obj, args, file); 729 730 out: 731 drm_gem_object_unreference(&obj->base); 732 unlock: 733 mutex_unlock(&dev->struct_mutex); 734 return ret; 735 } 736 737 /* This is the fast write path which cannot handle 738 * page faults in the source data 739 */ 740 741 static inline int 742 fast_user_write(struct io_mapping *mapping, 743 loff_t page_base, int page_offset, 744 char __user *user_data, 745 int length) 746 { 747 void __iomem *vaddr_atomic; 748 void *vaddr; 749 unsigned long unwritten; 750 751 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); 752 /* We can use the cpu mem copy function because this is X86. */ 753 vaddr = (char __force*)vaddr_atomic + page_offset; 754 unwritten = __copy_from_user_inatomic_nocache(vaddr, 755 user_data, length); 756 io_mapping_unmap_atomic(vaddr_atomic); 757 return unwritten; 758 } 759 760 /** 761 * This is the fast pwrite path, where we copy the data directly from the 762 * user into the GTT, uncached. 763 */ 764 static int 765 i915_gem_gtt_pwrite_fast(struct drm_device *dev, 766 struct drm_i915_gem_object *obj, 767 struct drm_i915_gem_pwrite *args, 768 struct drm_file *file) 769 { 770 struct drm_i915_private *dev_priv = to_i915(dev); 771 struct i915_ggtt *ggtt = &dev_priv->ggtt; 772 ssize_t remain; 773 loff_t offset, page_base; 774 char __user *user_data; 775 int page_offset, page_length, ret; 776 777 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK); 778 if (ret) 779 goto out; 780 781 ret = i915_gem_object_set_to_gtt_domain(obj, true); 782 if (ret) 783 goto out_unpin; 784 785 ret = i915_gem_object_put_fence(obj); 786 if (ret) 787 goto out_unpin; 788 789 user_data = u64_to_user_ptr(args->data_ptr); 790 remain = args->size; 791 792 offset = i915_gem_obj_ggtt_offset(obj) + args->offset; 793 794 intel_fb_obj_invalidate(obj, ORIGIN_GTT); 795 796 while (remain > 0) { 797 /* Operation in this page 798 * 799 * page_base = page offset within aperture 800 * page_offset = offset within page 801 * page_length = bytes to copy for this page 802 */ 803 page_base = offset & LINUX_PAGE_MASK; 804 page_offset = offset_in_page(offset); 805 page_length = remain; 806 if ((page_offset + remain) > PAGE_SIZE) 807 page_length = PAGE_SIZE - page_offset; 808 809 /* If we get a fault while copying data, then (presumably) our 810 * source page isn't available. Return the error and we'll 811 * retry in the slow path. 812 */ 813 if (fast_user_write(ggtt->mappable, page_base, 814 page_offset, user_data, page_length)) { 815 ret = -EFAULT; 816 goto out_flush; 817 } 818 819 remain -= page_length; 820 user_data += page_length; 821 offset += page_length; 822 } 823 824 out_flush: 825 intel_fb_obj_flush(obj, false, ORIGIN_GTT); 826 out_unpin: 827 i915_gem_object_ggtt_unpin(obj); 828 out: 829 return ret; 830 } 831 832 /* Per-page copy function for the shmem pwrite fastpath. 833 * Flushes invalid cachelines before writing to the target if 834 * needs_clflush_before is set and flushes out any written cachelines after 835 * writing if needs_clflush is set. */ 836 static int 837 shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length, 838 char __user *user_data, 839 bool page_do_bit17_swizzling, 840 bool needs_clflush_before, 841 bool needs_clflush_after) 842 { 843 char *vaddr; 844 int ret; 845 846 if (unlikely(page_do_bit17_swizzling)) 847 return -EINVAL; 848 849 vaddr = kmap_atomic(page); 850 if (needs_clflush_before) 851 drm_clflush_virt_range(vaddr + shmem_page_offset, 852 page_length); 853 ret = __copy_from_user_inatomic(vaddr + shmem_page_offset, 854 user_data, page_length); 855 if (needs_clflush_after) 856 drm_clflush_virt_range(vaddr + shmem_page_offset, 857 page_length); 858 kunmap_atomic(vaddr); 859 860 return ret ? -EFAULT : 0; 861 } 862 863 /* Only difference to the fast-path function is that this can handle bit17 864 * and uses non-atomic copy and kmap functions. */ 865 static int 866 shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length, 867 char __user *user_data, 868 bool page_do_bit17_swizzling, 869 bool needs_clflush_before, 870 bool needs_clflush_after) 871 { 872 char *vaddr; 873 int ret; 874 875 vaddr = kmap(page); 876 if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) 877 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 878 page_length, 879 page_do_bit17_swizzling); 880 if (page_do_bit17_swizzling) 881 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset, 882 user_data, 883 page_length); 884 else 885 ret = __copy_from_user(vaddr + shmem_page_offset, 886 user_data, 887 page_length); 888 if (needs_clflush_after) 889 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 890 page_length, 891 page_do_bit17_swizzling); 892 kunmap(page); 893 894 return ret ? -EFAULT : 0; 895 } 896 897 static int 898 i915_gem_shmem_pwrite(struct drm_device *dev, 899 struct drm_i915_gem_object *obj, 900 struct drm_i915_gem_pwrite *args, 901 struct drm_file *file) 902 { 903 ssize_t remain; 904 loff_t offset; 905 char __user *user_data; 906 int shmem_page_offset, page_length, ret = 0; 907 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 908 int hit_slowpath = 0; 909 int needs_clflush_after = 0; 910 int needs_clflush_before = 0; 911 struct sg_page_iter sg_iter; 912 913 user_data = u64_to_user_ptr(args->data_ptr); 914 remain = args->size; 915 916 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 917 918 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 919 /* If we're not in the cpu write domain, set ourself into the gtt 920 * write domain and manually flush cachelines (if required). This 921 * optimizes for the case when the gpu will use the data 922 * right away and we therefore have to clflush anyway. */ 923 needs_clflush_after = cpu_write_needs_clflush(obj); 924 ret = i915_gem_object_wait_rendering(obj, false); 925 if (ret) 926 return ret; 927 } 928 /* Same trick applies to invalidate partially written cachelines read 929 * before writing. */ 930 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) 931 needs_clflush_before = 932 !cpu_cache_is_coherent(dev, obj->cache_level); 933 934 ret = i915_gem_object_get_pages(obj); 935 if (ret) 936 return ret; 937 938 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 939 940 i915_gem_object_pin_pages(obj); 941 942 offset = args->offset; 943 obj->dirty = 1; 944 945 VM_OBJECT_LOCK(obj->base.filp); 946 vm_object_pip_add(obj->base.filp, 1); 947 948 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 949 offset >> PAGE_SHIFT) { 950 struct page *page = sg_page_iter_page(&sg_iter); 951 int partial_cacheline_write; 952 953 if (remain <= 0) 954 break; 955 956 /* Operation in this page 957 * 958 * shmem_page_offset = offset within page in shmem file 959 * page_length = bytes to copy for this page 960 */ 961 shmem_page_offset = offset_in_page(offset); 962 963 page_length = remain; 964 if ((shmem_page_offset + page_length) > PAGE_SIZE) 965 page_length = PAGE_SIZE - shmem_page_offset; 966 967 /* If we don't overwrite a cacheline completely we need to be 968 * careful to have up-to-date data by first clflushing. Don't 969 * overcomplicate things and flush the entire patch. */ 970 partial_cacheline_write = needs_clflush_before && 971 ((shmem_page_offset | page_length) 972 & (cpu_clflush_line_size - 1)); 973 974 page_do_bit17_swizzling = obj_do_bit17_swizzling && 975 (page_to_phys(page) & (1 << 17)) != 0; 976 977 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length, 978 user_data, page_do_bit17_swizzling, 979 partial_cacheline_write, 980 needs_clflush_after); 981 if (ret == 0) 982 goto next_page; 983 984 hit_slowpath = 1; 985 mutex_unlock(&dev->struct_mutex); 986 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length, 987 user_data, page_do_bit17_swizzling, 988 partial_cacheline_write, 989 needs_clflush_after); 990 991 mutex_lock(&dev->struct_mutex); 992 993 if (ret) 994 goto out; 995 996 next_page: 997 remain -= page_length; 998 user_data += page_length; 999 offset += page_length; 1000 } 1001 vm_object_pip_wakeup(obj->base.filp); 1002 VM_OBJECT_UNLOCK(obj->base.filp); 1003 1004 out: 1005 i915_gem_object_unpin_pages(obj); 1006 1007 if (hit_slowpath) { 1008 /* 1009 * Fixup: Flush cpu caches in case we didn't flush the dirty 1010 * cachelines in-line while writing and the object moved 1011 * out of the cpu write domain while we've dropped the lock. 1012 */ 1013 if (!needs_clflush_after && 1014 obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 1015 if (i915_gem_clflush_object(obj, obj->pin_display)) 1016 needs_clflush_after = true; 1017 } 1018 } 1019 1020 if (needs_clflush_after) 1021 i915_gem_chipset_flush(dev); 1022 else 1023 obj->cache_dirty = true; 1024 1025 intel_fb_obj_flush(obj, false, ORIGIN_CPU); 1026 return ret; 1027 } 1028 1029 /** 1030 * Writes data to the object referenced by handle. 1031 * 1032 * On error, the contents of the buffer that were to be modified are undefined. 1033 */ 1034 int 1035 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 1036 struct drm_file *file) 1037 { 1038 struct drm_i915_private *dev_priv = dev->dev_private; 1039 struct drm_i915_gem_pwrite *args = data; 1040 struct drm_i915_gem_object *obj; 1041 int ret; 1042 1043 if (args->size == 0) 1044 return 0; 1045 1046 #if 0 1047 if (!access_ok(VERIFY_READ, 1048 u64_to_user_ptr(args->data_ptr), 1049 args->size)) 1050 return -EFAULT; 1051 #endif 1052 1053 if (likely(!i915.prefault_disable)) { 1054 ret = fault_in_multipages_readable(u64_to_user_ptr(args->data_ptr), 1055 args->size); 1056 if (ret) 1057 return -EFAULT; 1058 } 1059 1060 intel_runtime_pm_get(dev_priv); 1061 1062 ret = i915_mutex_lock_interruptible(dev); 1063 if (ret) 1064 goto put_rpm; 1065 1066 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 1067 if (&obj->base == NULL) { 1068 ret = -ENOENT; 1069 goto unlock; 1070 } 1071 1072 /* Bounds check destination. */ 1073 if (args->offset > obj->base.size || 1074 args->size > obj->base.size - args->offset) { 1075 ret = -EINVAL; 1076 goto out; 1077 } 1078 1079 /* prime objects have no backing filp to GEM pread/pwrite 1080 * pages from. 1081 */ 1082 1083 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 1084 1085 ret = -EFAULT; 1086 /* We can only do the GTT pwrite on untiled buffers, as otherwise 1087 * it would end up going through the fenced access, and we'll get 1088 * different detiling behavior between reading and writing. 1089 * pread/pwrite currently are reading and writing from the CPU 1090 * perspective, requiring manual detiling by the client. 1091 */ 1092 if (obj->tiling_mode == I915_TILING_NONE && 1093 obj->base.write_domain != I915_GEM_DOMAIN_CPU && 1094 cpu_write_needs_clflush(obj)) { 1095 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); 1096 /* Note that the gtt paths might fail with non-page-backed user 1097 * pointers (e.g. gtt mappings when moving data between 1098 * textures). Fallback to the shmem path in that case. */ 1099 } 1100 1101 if (ret == -EFAULT || ret == -ENOSPC) { 1102 if (obj->phys_handle) 1103 ret = i915_gem_phys_pwrite(obj, args, file); 1104 else 1105 ret = i915_gem_shmem_pwrite(dev, obj, args, file); 1106 } 1107 1108 out: 1109 drm_gem_object_unreference(&obj->base); 1110 unlock: 1111 mutex_unlock(&dev->struct_mutex); 1112 put_rpm: 1113 intel_runtime_pm_put(dev_priv); 1114 1115 return ret; 1116 } 1117 1118 static int 1119 i915_gem_check_wedge(unsigned reset_counter, bool interruptible) 1120 { 1121 if (__i915_terminally_wedged(reset_counter)) 1122 return -EIO; 1123 1124 if (__i915_reset_in_progress(reset_counter)) { 1125 /* Non-interruptible callers can't handle -EAGAIN, hence return 1126 * -EIO unconditionally for these. */ 1127 if (!interruptible) 1128 return -EIO; 1129 1130 return -EAGAIN; 1131 } 1132 1133 return 0; 1134 } 1135 1136 static void fake_irq(unsigned long data) 1137 { 1138 wakeup_one((void *)data); 1139 } 1140 1141 static bool missed_irq(struct drm_i915_private *dev_priv, 1142 struct intel_engine_cs *engine) 1143 { 1144 return test_bit(engine->id, &dev_priv->gpu_error.missed_irq_rings); 1145 } 1146 1147 #if 0 1148 static int __i915_spin_request(struct drm_i915_gem_request *req, int state) 1149 { 1150 unsigned long timeout; 1151 unsigned cpu; 1152 1153 /* When waiting for high frequency requests, e.g. during synchronous 1154 * rendering split between the CPU and GPU, the finite amount of time 1155 * required to set up the irq and wait upon it limits the response 1156 * rate. By busywaiting on the request completion for a short while we 1157 * can service the high frequency waits as quick as possible. However, 1158 * if it is a slow request, we want to sleep as quickly as possible. 1159 * The tradeoff between waiting and sleeping is roughly the time it 1160 * takes to sleep on a request, on the order of a microsecond. 1161 */ 1162 1163 if (req->engine->irq_refcount) 1164 return -EBUSY; 1165 1166 /* Only spin if we know the GPU is processing this request */ 1167 if (!i915_gem_request_started(req, true)) 1168 return -EAGAIN; 1169 1170 timeout = local_clock_us(&cpu) + 5; 1171 while (!need_resched()) { 1172 if (i915_gem_request_completed(req, true)) 1173 return 0; 1174 1175 if (signal_pending_state(state, current)) 1176 break; 1177 1178 if (busywait_stop(timeout, cpu)) 1179 break; 1180 1181 cpu_relax_lowlatency(); 1182 } 1183 1184 if (i915_gem_request_completed(req, false)) 1185 return 0; 1186 1187 return -EAGAIN; 1188 } 1189 #endif 1190 1191 /** 1192 * __i915_wait_request - wait until execution of request has finished 1193 * @req: duh! 1194 * @interruptible: do an interruptible wait (normally yes) 1195 * @timeout: in - how long to wait (NULL forever); out - how much time remaining 1196 * 1197 * Note: It is of utmost importance that the passed in seqno and reset_counter 1198 * values have been read by the caller in an smp safe manner. Where read-side 1199 * locks are involved, it is sufficient to read the reset_counter before 1200 * unlocking the lock that protects the seqno. For lockless tricks, the 1201 * reset_counter _must_ be read before, and an appropriate smp_rmb must be 1202 * inserted. 1203 * 1204 * Returns 0 if the request was found within the alloted time. Else returns the 1205 * errno with remaining time filled in timeout argument. 1206 */ 1207 int __i915_wait_request(struct drm_i915_gem_request *req, 1208 bool interruptible, 1209 s64 *timeout, 1210 struct intel_rps_client *rps) 1211 { 1212 struct intel_engine_cs *engine = i915_gem_request_get_engine(req); 1213 struct drm_device *dev = engine->dev; 1214 struct drm_i915_private *dev_priv = dev->dev_private; 1215 const bool irq_test_in_progress = 1216 ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_engine_flag(engine); 1217 int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; 1218 unsigned long timeout_expire; 1219 s64 before = 0; /* Only to silence a compiler warning. */ 1220 int ret, sl_timeout = 1; 1221 1222 WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled"); 1223 1224 if (list_empty(&req->list)) 1225 return 0; 1226 1227 if (i915_gem_request_completed(req, true)) 1228 return 0; 1229 1230 timeout_expire = 0; 1231 if (timeout) { 1232 if (WARN_ON(*timeout < 0)) 1233 return -EINVAL; 1234 1235 if (*timeout == 0) 1236 return -ETIME; 1237 1238 timeout_expire = jiffies + nsecs_to_jiffies_timeout(*timeout); 1239 1240 /* 1241 * Record current time in case interrupted by signal, or wedged. 1242 */ 1243 before = ktime_get_raw_ns(); 1244 } 1245 1246 if (INTEL_INFO(dev_priv)->gen >= 6) 1247 gen6_rps_boost(dev_priv, rps, req->emitted_jiffies); 1248 1249 trace_i915_gem_request_wait_begin(req); 1250 1251 /* Optimistic spin for the next jiffie before touching IRQs */ 1252 #if 0 1253 ret = __i915_spin_request(req); 1254 if (ret == 0) 1255 goto out; 1256 #endif 1257 1258 if (!irq_test_in_progress && WARN_ON(!engine->irq_get(engine))) { 1259 ret = -ENODEV; 1260 goto out; 1261 } 1262 1263 lockmgr(&engine->irq_queue.lock, LK_EXCLUSIVE); 1264 for (;;) { 1265 struct timer_list timer; 1266 1267 /* We need to check whether any gpu reset happened in between 1268 * the request being submitted and now. If a reset has occurred, 1269 * the request is effectively complete (we either are in the 1270 * process of or have discarded the rendering and completely 1271 * reset the GPU. The results of the request are lost and we 1272 * are free to continue on with the original operation. 1273 */ 1274 if (req->reset_counter != i915_reset_counter(&dev_priv->gpu_error)) { 1275 ret = 0; 1276 break; 1277 } 1278 1279 if (i915_gem_request_completed(req, false)) { 1280 ret = 0; 1281 break; 1282 } 1283 1284 if (signal_pending_state(state, current)) { 1285 ret = -ERESTARTSYS; 1286 break; 1287 } 1288 1289 if (timeout && time_after_eq(jiffies, timeout_expire)) { 1290 ret = -ETIME; 1291 break; 1292 } 1293 1294 timer.function = NULL; 1295 if (timeout || missed_irq(dev_priv, engine)) { 1296 unsigned long expire; 1297 1298 setup_timer_on_stack(&timer, fake_irq, (unsigned long)&engine->irq_queue); 1299 expire = missed_irq(dev_priv, engine) ? jiffies + 1 : timeout_expire; 1300 sl_timeout = expire - jiffies; 1301 if (sl_timeout < 1) 1302 sl_timeout = 1; 1303 mod_timer(&timer, expire); 1304 } 1305 1306 #if 0 1307 io_schedule(); 1308 #endif 1309 1310 if (timer.function) { 1311 del_singleshot_timer_sync(&timer); 1312 destroy_timer_on_stack(&timer); 1313 } 1314 1315 lksleep(&engine->irq_queue, &engine->irq_queue.lock, 1316 interruptible ? PCATCH : 0, "lwe", sl_timeout); 1317 } 1318 lockmgr(&engine->irq_queue.lock, LK_RELEASE); 1319 if (!irq_test_in_progress) 1320 engine->irq_put(engine); 1321 1322 out: 1323 trace_i915_gem_request_wait_end(req); 1324 1325 if (timeout) { 1326 s64 tres = *timeout - (ktime_get_raw_ns() - before); 1327 1328 *timeout = tres < 0 ? 0 : tres; 1329 1330 /* 1331 * Apparently ktime isn't accurate enough and occasionally has a 1332 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch 1333 * things up to make the test happy. We allow up to 1 jiffy. 1334 * 1335 * This is a regrssion from the timespec->ktime conversion. 1336 */ 1337 if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000) 1338 *timeout = 0; 1339 } 1340 1341 return ret; 1342 } 1343 1344 int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, 1345 struct drm_file *file) 1346 { 1347 struct drm_i915_file_private *file_priv; 1348 1349 WARN_ON(!req || !file || req->file_priv); 1350 1351 if (!req || !file) 1352 return -EINVAL; 1353 1354 if (req->file_priv) 1355 return -EINVAL; 1356 1357 file_priv = file->driver_priv; 1358 1359 spin_lock(&file_priv->mm.lock); 1360 req->file_priv = file_priv; 1361 list_add_tail(&req->client_list, &file_priv->mm.request_list); 1362 spin_unlock(&file_priv->mm.lock); 1363 1364 req->pid = curproc->p_pid; 1365 1366 return 0; 1367 } 1368 1369 static inline void 1370 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) 1371 { 1372 struct drm_i915_file_private *file_priv = request->file_priv; 1373 1374 if (!file_priv) 1375 return; 1376 1377 spin_lock(&file_priv->mm.lock); 1378 list_del(&request->client_list); 1379 request->file_priv = NULL; 1380 spin_unlock(&file_priv->mm.lock); 1381 1382 #if 0 1383 put_pid(request->pid); 1384 request->pid = NULL; 1385 #endif 1386 } 1387 1388 static void i915_gem_request_retire(struct drm_i915_gem_request *request) 1389 { 1390 trace_i915_gem_request_retire(request); 1391 1392 /* We know the GPU must have read the request to have 1393 * sent us the seqno + interrupt, so use the position 1394 * of tail of the request to update the last known position 1395 * of the GPU head. 1396 * 1397 * Note this requires that we are always called in request 1398 * completion order. 1399 */ 1400 request->ringbuf->last_retired_head = request->postfix; 1401 1402 list_del_init(&request->list); 1403 i915_gem_request_remove_from_client(request); 1404 1405 i915_gem_request_unreference(request); 1406 } 1407 1408 static void 1409 __i915_gem_request_retire__upto(struct drm_i915_gem_request *req) 1410 { 1411 struct intel_engine_cs *engine = req->engine; 1412 struct drm_i915_gem_request *tmp; 1413 1414 lockdep_assert_held(&engine->dev->struct_mutex); 1415 1416 if (list_empty(&req->list)) 1417 return; 1418 1419 do { 1420 tmp = list_first_entry(&engine->request_list, 1421 typeof(*tmp), list); 1422 1423 i915_gem_request_retire(tmp); 1424 } while (tmp != req); 1425 1426 WARN_ON(i915_verify_lists(engine->dev)); 1427 } 1428 1429 /** 1430 * Waits for a request to be signaled, and cleans up the 1431 * request and object lists appropriately for that event. 1432 */ 1433 int 1434 i915_wait_request(struct drm_i915_gem_request *req) 1435 { 1436 struct drm_i915_private *dev_priv = req->i915; 1437 bool interruptible; 1438 int ret; 1439 1440 interruptible = dev_priv->mm.interruptible; 1441 1442 BUG_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex)); 1443 1444 ret = __i915_wait_request(req, interruptible, NULL, NULL); 1445 if (ret) 1446 return ret; 1447 1448 /* If the GPU hung, we want to keep the requests to find the guilty. */ 1449 if (req->reset_counter == i915_reset_counter(&dev_priv->gpu_error)) 1450 __i915_gem_request_retire__upto(req); 1451 1452 return 0; 1453 } 1454 1455 /** 1456 * Ensures that all rendering to the object has completed and the object is 1457 * safe to unbind from the GTT or access from the CPU. 1458 */ 1459 int 1460 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, 1461 bool readonly) 1462 { 1463 int ret, i; 1464 1465 if (!obj->active) 1466 return 0; 1467 1468 if (readonly) { 1469 if (obj->last_write_req != NULL) { 1470 ret = i915_wait_request(obj->last_write_req); 1471 if (ret) 1472 return ret; 1473 1474 i = obj->last_write_req->engine->id; 1475 if (obj->last_read_req[i] == obj->last_write_req) 1476 i915_gem_object_retire__read(obj, i); 1477 else 1478 i915_gem_object_retire__write(obj); 1479 } 1480 } else { 1481 for (i = 0; i < I915_NUM_ENGINES; i++) { 1482 if (obj->last_read_req[i] == NULL) 1483 continue; 1484 1485 ret = i915_wait_request(obj->last_read_req[i]); 1486 if (ret) 1487 return ret; 1488 1489 i915_gem_object_retire__read(obj, i); 1490 } 1491 GEM_BUG_ON(obj->active); 1492 } 1493 1494 return 0; 1495 } 1496 1497 static void 1498 i915_gem_object_retire_request(struct drm_i915_gem_object *obj, 1499 struct drm_i915_gem_request *req) 1500 { 1501 int ring = req->engine->id; 1502 1503 if (obj->last_read_req[ring] == req) 1504 i915_gem_object_retire__read(obj, ring); 1505 else if (obj->last_write_req == req) 1506 i915_gem_object_retire__write(obj); 1507 1508 if (req->reset_counter == i915_reset_counter(&req->i915->gpu_error)) 1509 __i915_gem_request_retire__upto(req); 1510 } 1511 1512 /* A nonblocking variant of the above wait. This is a highly dangerous routine 1513 * as the object state may change during this call. 1514 */ 1515 static __must_check int 1516 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, 1517 struct intel_rps_client *rps, 1518 bool readonly) 1519 { 1520 struct drm_device *dev = obj->base.dev; 1521 struct drm_i915_private *dev_priv = dev->dev_private; 1522 struct drm_i915_gem_request *requests[I915_NUM_ENGINES]; 1523 int ret, i, n = 0; 1524 1525 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1526 BUG_ON(!dev_priv->mm.interruptible); 1527 1528 if (!obj->active) 1529 return 0; 1530 1531 if (readonly) { 1532 struct drm_i915_gem_request *req; 1533 1534 req = obj->last_write_req; 1535 if (req == NULL) 1536 return 0; 1537 1538 requests[n++] = i915_gem_request_reference(req); 1539 } else { 1540 for (i = 0; i < I915_NUM_ENGINES; i++) { 1541 struct drm_i915_gem_request *req; 1542 1543 req = obj->last_read_req[i]; 1544 if (req == NULL) 1545 continue; 1546 1547 requests[n++] = i915_gem_request_reference(req); 1548 } 1549 } 1550 1551 mutex_unlock(&dev->struct_mutex); 1552 ret = 0; 1553 for (i = 0; ret == 0 && i < n; i++) 1554 ret = __i915_wait_request(requests[i], true, NULL, rps); 1555 mutex_lock(&dev->struct_mutex); 1556 1557 for (i = 0; i < n; i++) { 1558 if (ret == 0) 1559 i915_gem_object_retire_request(obj, requests[i]); 1560 i915_gem_request_unreference(requests[i]); 1561 } 1562 1563 return ret; 1564 } 1565 1566 static struct intel_rps_client *to_rps_client(struct drm_file *file) 1567 { 1568 struct drm_i915_file_private *fpriv = file->driver_priv; 1569 return &fpriv->rps; 1570 } 1571 1572 /** 1573 * Called when user space prepares to use an object with the CPU, either 1574 * through the mmap ioctl's mapping or a GTT mapping. 1575 */ 1576 int 1577 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1578 struct drm_file *file) 1579 { 1580 struct drm_i915_gem_set_domain *args = data; 1581 struct drm_i915_gem_object *obj; 1582 uint32_t read_domains = args->read_domains; 1583 uint32_t write_domain = args->write_domain; 1584 int ret; 1585 1586 /* Only handle setting domains to types used by the CPU. */ 1587 if (write_domain & I915_GEM_GPU_DOMAINS) 1588 return -EINVAL; 1589 1590 if (read_domains & I915_GEM_GPU_DOMAINS) 1591 return -EINVAL; 1592 1593 /* Having something in the write domain implies it's in the read 1594 * domain, and only that read domain. Enforce that in the request. 1595 */ 1596 if (write_domain != 0 && read_domains != write_domain) 1597 return -EINVAL; 1598 1599 ret = i915_mutex_lock_interruptible(dev); 1600 if (ret) 1601 return ret; 1602 1603 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 1604 if (&obj->base == NULL) { 1605 ret = -ENOENT; 1606 goto unlock; 1607 } 1608 1609 /* Try to flush the object off the GPU without holding the lock. 1610 * We will repeat the flush holding the lock in the normal manner 1611 * to catch cases where we are gazumped. 1612 */ 1613 ret = i915_gem_object_wait_rendering__nonblocking(obj, 1614 to_rps_client(file), 1615 !write_domain); 1616 if (ret) 1617 goto unref; 1618 1619 if (read_domains & I915_GEM_DOMAIN_GTT) 1620 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 1621 else 1622 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 1623 1624 if (write_domain != 0) 1625 intel_fb_obj_invalidate(obj, 1626 write_domain == I915_GEM_DOMAIN_GTT ? 1627 ORIGIN_GTT : ORIGIN_CPU); 1628 1629 unref: 1630 drm_gem_object_unreference(&obj->base); 1631 unlock: 1632 mutex_unlock(&dev->struct_mutex); 1633 return ret; 1634 } 1635 1636 /** 1637 * Called when user space has done writes to this buffer 1638 */ 1639 int 1640 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1641 struct drm_file *file) 1642 { 1643 struct drm_i915_gem_sw_finish *args = data; 1644 struct drm_i915_gem_object *obj; 1645 int ret = 0; 1646 1647 ret = i915_mutex_lock_interruptible(dev); 1648 if (ret) 1649 return ret; 1650 1651 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 1652 if (&obj->base == NULL) { 1653 ret = -ENOENT; 1654 goto unlock; 1655 } 1656 1657 /* Pinned buffers may be scanout, so flush the cache */ 1658 if (obj->pin_display) 1659 i915_gem_object_flush_cpu_write_domain(obj); 1660 1661 drm_gem_object_unreference(&obj->base); 1662 unlock: 1663 mutex_unlock(&dev->struct_mutex); 1664 return ret; 1665 } 1666 1667 /** 1668 * Maps the contents of an object, returning the address it is mapped 1669 * into. 1670 * 1671 * While the mapping holds a reference on the contents of the object, it doesn't 1672 * imply a ref on the object itself. 1673 * 1674 * IMPORTANT: 1675 * 1676 * DRM driver writers who look a this function as an example for how to do GEM 1677 * mmap support, please don't implement mmap support like here. The modern way 1678 * to implement DRM mmap support is with an mmap offset ioctl (like 1679 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly. 1680 * That way debug tooling like valgrind will understand what's going on, hiding 1681 * the mmap call in a driver private ioctl will break that. The i915 driver only 1682 * does cpu mmaps this way because we didn't know better. 1683 */ 1684 int 1685 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1686 struct drm_file *file) 1687 { 1688 struct drm_i915_gem_mmap *args = data; 1689 struct drm_gem_object *obj; 1690 unsigned long addr; 1691 1692 struct proc *p = curproc; 1693 vm_map_t map = &p->p_vmspace->vm_map; 1694 vm_size_t size; 1695 int error = 0, rv; 1696 1697 if (args->flags & ~(I915_MMAP_WC)) 1698 return -EINVAL; 1699 1700 obj = drm_gem_object_lookup(file, args->handle); 1701 if (obj == NULL) 1702 return -ENOENT; 1703 1704 if (args->size == 0) 1705 goto out; 1706 1707 size = round_page(args->size); 1708 if (map->size + size > p->p_rlimit[RLIMIT_VMEM].rlim_cur) { 1709 error = -ENOMEM; 1710 goto out; 1711 } 1712 1713 /* prime objects have no backing filp to GEM mmap 1714 * pages from. 1715 */ 1716 if (!obj->filp) { 1717 drm_gem_object_unreference_unlocked(obj); 1718 return -EINVAL; 1719 } 1720 1721 /* 1722 * Call hint to ensure that NULL is not returned as a valid address 1723 * and to reduce vm_map traversals. XXX causes instability, use a 1724 * fixed low address as the start point instead to avoid the NULL 1725 * return issue. 1726 */ 1727 1728 addr = PAGE_SIZE; 1729 1730 /* 1731 * Use 256KB alignment. It is unclear why this matters for a 1732 * virtual address but it appears to fix a number of application/X 1733 * crashes and kms console switching is much faster. 1734 */ 1735 vm_object_hold(obj->filp); 1736 vm_object_reference_locked(obj->filp); 1737 vm_object_drop(obj->filp); 1738 1739 /* Something gets wrong here: fails to mmap 4096 */ 1740 rv = vm_map_find(map, obj->filp, NULL, 1741 args->offset, &addr, args->size, 1742 256 * 1024, /* align */ 1743 TRUE, /* fitit */ 1744 VM_MAPTYPE_NORMAL, VM_SUBSYS_DRM_GEM, 1745 VM_PROT_READ | VM_PROT_WRITE, /* prot */ 1746 VM_PROT_READ | VM_PROT_WRITE, /* max */ 1747 MAP_SHARED /* cow */); 1748 if (rv != KERN_SUCCESS) { 1749 vm_object_deallocate(obj->filp); 1750 error = -vm_mmap_to_errno(rv); 1751 } else { 1752 args->addr_ptr = (uint64_t)addr; 1753 } 1754 out: 1755 drm_gem_object_unreference(obj); 1756 return (error); 1757 } 1758 1759 /** 1760 * i915_gem_fault - fault a page into the GTT 1761 * 1762 * vm_obj is locked on entry and expected to be locked on return. 1763 * 1764 * The vm_pager has placemarked the object with an anonymous memory page 1765 * which we must replace atomically to avoid races against concurrent faults 1766 * on the same page. XXX we currently are unable to do this atomically. 1767 * 1768 * If we are to return an error we should not touch the anonymous page, 1769 * the caller will deallocate it. 1770 * 1771 * XXX Most GEM calls appear to be interruptable, but we can't hard loop 1772 * in that case. Release all resources and wait 1 tick before retrying. 1773 * This is a huge problem which needs to be fixed by getting rid of most 1774 * of the interruptability. The linux code does not retry but does appear 1775 * to have some sort of mechanism (VM_FAULT_NOPAGE ?) for the higher level 1776 * to be able to retry. 1777 * 1778 * -- 1779 * @vma: VMA in question 1780 * @vmf: fault info 1781 * 1782 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1783 * from userspace. The fault handler takes care of binding the object to 1784 * the GTT (if needed), allocating and programming a fence register (again, 1785 * only if needed based on whether the old reg is still valid or the object 1786 * is tiled) and inserting a new PTE into the faulting process. 1787 * 1788 * Note that the faulting process may involve evicting existing objects 1789 * from the GTT and/or fence registers to make room. So performance may 1790 * suffer if the GTT working set is large or there are few fence registers 1791 * left. 1792 * 1793 * vm_obj is locked on entry and expected to be locked on return. The VM 1794 * pager has placed an anonymous memory page at (obj,offset) which we have 1795 * to replace. 1796 */ 1797 int i915_gem_fault(vm_object_t vm_obj, vm_ooffset_t offset, int prot, vm_page_t *mres) 1798 { 1799 struct drm_i915_gem_object *obj = to_intel_bo(vm_obj->handle); 1800 struct drm_device *dev = obj->base.dev; 1801 struct drm_i915_private *dev_priv = to_i915(dev); 1802 struct i915_ggtt *ggtt = &dev_priv->ggtt; 1803 struct i915_ggtt_view view = i915_ggtt_view_normal; 1804 unsigned long page_offset; 1805 vm_page_t m; 1806 int ret = 0; 1807 bool write = !!(prot & VM_PROT_WRITE); 1808 1809 intel_runtime_pm_get(dev_priv); 1810 1811 /* We don't use vmf->pgoff since that has the fake offset */ 1812 page_offset = (unsigned long)offset; 1813 1814 /* 1815 * vm_fault() has supplied us with a busied page placeholding 1816 * the operation. This presents a lock order reversal issue 1817 * again i915_gem_release_mmap() for our device mutex. 1818 * 1819 * Deal with the problem by getting rid of the placeholder now, 1820 * and then dealing with the potential for a new placeholder when 1821 * we try to insert later. 1822 */ 1823 if (*mres != NULL) { 1824 m = *mres; 1825 *mres = NULL; 1826 if ((m->busy_count & PBUSY_LOCKED) == 0) 1827 kprintf("i915_gem_fault: Page was not busy\n"); 1828 else 1829 vm_page_remove(m); 1830 vm_page_free(m); 1831 } 1832 1833 m = NULL; 1834 1835 retry: 1836 ret = i915_mutex_lock_interruptible(dev); 1837 if (ret) 1838 goto out; 1839 1840 trace_i915_gem_object_fault(obj, page_offset, true, write); 1841 1842 /* Try to flush the object off the GPU first without holding the lock. 1843 * Upon reacquiring the lock, we will perform our sanity checks and then 1844 * repeat the flush holding the lock in the normal manner to catch cases 1845 * where we are gazumped. 1846 */ 1847 ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write); 1848 if (ret) 1849 goto unlock; 1850 1851 /* Access to snoopable pages through the GTT is incoherent. */ 1852 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) { 1853 ret = -EFAULT; 1854 goto unlock; 1855 } 1856 1857 /* Use a partial view if the object is bigger than the aperture. */ 1858 if (obj->base.size >= ggtt->mappable_end && 1859 obj->tiling_mode == I915_TILING_NONE) { 1860 #if 0 1861 static const unsigned int chunk_size = 256; // 1 MiB 1862 1863 memset(&view, 0, sizeof(view)); 1864 view.type = I915_GGTT_VIEW_PARTIAL; 1865 view.params.partial.offset = rounddown(page_offset, chunk_size); 1866 view.params.partial.size = 1867 min_t(unsigned int, 1868 chunk_size, 1869 (vma->vm_end - vma->vm_start)/PAGE_SIZE - 1870 view.params.partial.offset); 1871 #endif 1872 } 1873 1874 /* Now pin it into the GTT if needed */ 1875 ret = i915_gem_object_ggtt_pin(obj, &view, 0, PIN_MAPPABLE); 1876 if (ret) 1877 goto unlock; 1878 1879 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1880 if (ret) 1881 goto unpin; 1882 1883 ret = i915_gem_object_get_fence(obj); 1884 if (ret) 1885 goto unpin; 1886 1887 /* 1888 * START FREEBSD MAGIC 1889 * 1890 * Add a pip count to avoid destruction and certain other 1891 * complex operations (such as collapses?) while unlocked. 1892 */ 1893 vm_object_pip_add(vm_obj, 1); 1894 1895 ret = 0; 1896 m = NULL; 1897 1898 /* 1899 * Since the object lock was dropped, another thread might have 1900 * faulted on the same GTT address and instantiated the mapping. 1901 * Recheck. 1902 */ 1903 m = vm_page_lookup(vm_obj, OFF_TO_IDX(offset)); 1904 if (m != NULL) { 1905 /* 1906 * Try to busy the page, retry on failure (non-zero ret). 1907 */ 1908 if (vm_page_busy_try(m, false)) { 1909 kprintf("i915_gem_fault: BUSY\n"); 1910 ret = -EINTR; 1911 goto unlock; 1912 } 1913 goto have_page; 1914 } 1915 /* 1916 * END FREEBSD MAGIC 1917 */ 1918 1919 obj->fault_mappable = true; 1920 1921 /* Finally, remap it using the new GTT offset */ 1922 m = vm_phys_fictitious_to_vm_page(ggtt->mappable_base + 1923 i915_gem_obj_ggtt_offset_view(obj, &view) + offset); 1924 if (m == NULL) { 1925 ret = -EFAULT; 1926 goto unpin; 1927 } 1928 KASSERT((m->flags & PG_FICTITIOUS) != 0, ("not fictitious %p", m)); 1929 KASSERT(m->wire_count == 1, ("wire_count not 1 %p", m)); 1930 1931 /* 1932 * Try to busy the page. Fails on non-zero return. 1933 */ 1934 if (vm_page_busy_try(m, false)) { 1935 kprintf("i915_gem_fault: BUSY(2)\n"); 1936 ret = -EINTR; 1937 goto unpin; 1938 } 1939 m->valid = VM_PAGE_BITS_ALL; 1940 1941 #if 1 1942 /* 1943 * This should always work since we already checked via a lookup 1944 * above. 1945 */ 1946 if (vm_page_insert(m, vm_obj, OFF_TO_IDX(offset)) == FALSE) { 1947 kprintf("i915:gem_fault: page %p,%jd already in object\n", 1948 vm_obj, 1949 OFF_TO_IDX(offset)); 1950 vm_page_wakeup(m); 1951 ret = -EINTR; 1952 goto unpin; 1953 } 1954 #else 1955 /* NOT COMPILED ATM */ 1956 if (unlikely(view.type == I915_GGTT_VIEW_PARTIAL)) { 1957 /* Overriding existing pages in partial view does not cause 1958 * us any trouble as TLBs are still valid because the fault 1959 * is due to userspace losing part of the mapping or never 1960 * having accessed it before (at this partials' range). 1961 */ 1962 unsigned long base = vma->vm_start + 1963 (view.params.partial.offset << PAGE_SHIFT); 1964 unsigned int i; 1965 1966 for (i = 0; i < view.params.partial.size; i++) { 1967 ret = vm_insert_pfn(vma, base + i * PAGE_SIZE, pfn + i); 1968 if (ret) 1969 break; 1970 } 1971 1972 obj->fault_mappable = true; 1973 } else { 1974 if (!obj->fault_mappable) { 1975 unsigned long size = min_t(unsigned long, 1976 vma->vm_end - vma->vm_start, 1977 obj->base.size); 1978 int i; 1979 1980 for (i = 0; i < size >> PAGE_SHIFT; i++) { 1981 ret = vm_insert_pfn(vma, 1982 (unsigned long)vma->vm_start + i * PAGE_SIZE, 1983 pfn + i); 1984 if (ret) 1985 break; 1986 } 1987 1988 obj->fault_mappable = true; 1989 } else 1990 ret = vm_insert_pfn(vma, 1991 (unsigned long)vmf->virtual_address, 1992 pfn + page_offset); 1993 } 1994 #endif 1995 1996 have_page: 1997 *mres = m; 1998 1999 i915_gem_object_ggtt_unpin_view(obj, &view); 2000 mutex_unlock(&dev->struct_mutex); 2001 ret = VM_PAGER_OK; 2002 goto done; 2003 2004 /* 2005 * ALTERNATIVE ERROR RETURN. 2006 * 2007 * OBJECT EXPECTED TO BE LOCKED. 2008 */ 2009 unpin: 2010 i915_gem_object_ggtt_unpin_view(obj, &view); 2011 unlock: 2012 mutex_unlock(&dev->struct_mutex); 2013 out: 2014 switch (ret) { 2015 case -EIO: 2016 /* 2017 * We eat errors when the gpu is terminally wedged to avoid 2018 * userspace unduly crashing (gl has no provisions for mmaps to 2019 * fail). But any other -EIO isn't ours (e.g. swap in failure) 2020 * and so needs to be reported. 2021 */ 2022 if (!i915_terminally_wedged(&dev_priv->gpu_error)) { 2023 // ret = VM_FAULT_SIGBUS; 2024 break; 2025 } 2026 case -EAGAIN: 2027 /* 2028 * EAGAIN means the gpu is hung and we'll wait for the error 2029 * handler to reset everything when re-faulting in 2030 * i915_mutex_lock_interruptible. 2031 */ 2032 case -ERESTARTSYS: 2033 case -EINTR: 2034 VM_OBJECT_UNLOCK(vm_obj); 2035 int dummy; 2036 tsleep(&dummy, 0, "delay", 1); /* XXX */ 2037 VM_OBJECT_LOCK(vm_obj); 2038 goto retry; 2039 default: 2040 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 2041 ret = VM_PAGER_ERROR; 2042 break; 2043 } 2044 2045 done: 2046 vm_object_pip_wakeup(vm_obj); 2047 2048 intel_runtime_pm_put(dev_priv); 2049 return ret; 2050 } 2051 2052 /** 2053 * i915_gem_release_mmap - remove physical page mappings 2054 * @obj: obj in question 2055 * 2056 * Preserve the reservation of the mmapping with the DRM core code, but 2057 * relinquish ownership of the pages back to the system. 2058 * 2059 * It is vital that we remove the page mapping if we have mapped a tiled 2060 * object through the GTT and then lose the fence register due to 2061 * resource pressure. Similarly if the object has been moved out of the 2062 * aperture, than pages mapped into userspace must be revoked. Removing the 2063 * mapping will then trigger a page fault on the next user access, allowing 2064 * fixup by i915_gem_fault(). 2065 */ 2066 void 2067 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 2068 { 2069 vm_object_t devobj; 2070 vm_page_t m; 2071 int i, page_count; 2072 2073 /* Serialisation between user GTT access and our code depends upon 2074 * revoking the CPU's PTE whilst the mutex is held. The next user 2075 * pagefault then has to wait until we release the mutex. 2076 */ 2077 lockdep_assert_held(&obj->base.dev->struct_mutex); 2078 2079 if (!obj->fault_mappable) 2080 return; 2081 2082 devobj = cdev_pager_lookup(obj); 2083 if (devobj != NULL) { 2084 page_count = OFF_TO_IDX(obj->base.size); 2085 2086 VM_OBJECT_LOCK(devobj); 2087 for (i = 0; i < page_count; i++) { 2088 m = vm_page_lookup_busy_wait(devobj, i, TRUE, "915unm"); 2089 if (m == NULL) 2090 continue; 2091 cdev_pager_free_page(devobj, m); 2092 } 2093 VM_OBJECT_UNLOCK(devobj); 2094 vm_object_deallocate(devobj); 2095 } 2096 2097 /* Ensure that the CPU's PTE are revoked and there are not outstanding 2098 * memory transactions from userspace before we return. The TLB 2099 * flushing implied above by changing the PTE above *should* be 2100 * sufficient, an extra barrier here just provides us with a bit 2101 * of paranoid documentation about our requirement to serialise 2102 * memory writes before touching registers / GSM. 2103 */ 2104 wmb(); 2105 2106 obj->fault_mappable = false; 2107 } 2108 2109 void 2110 i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv) 2111 { 2112 struct drm_i915_gem_object *obj; 2113 2114 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) 2115 i915_gem_release_mmap(obj); 2116 } 2117 2118 uint32_t 2119 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) 2120 { 2121 uint32_t gtt_size; 2122 2123 if (INTEL_INFO(dev)->gen >= 4 || 2124 tiling_mode == I915_TILING_NONE) 2125 return size; 2126 2127 /* Previous chips need a power-of-two fence region when tiling */ 2128 if (INTEL_INFO(dev)->gen == 3) 2129 gtt_size = 1024*1024; 2130 else 2131 gtt_size = 512*1024; 2132 2133 while (gtt_size < size) 2134 gtt_size <<= 1; 2135 2136 return gtt_size; 2137 } 2138 2139 /** 2140 * i915_gem_get_gtt_alignment - return required GTT alignment for an object 2141 * @obj: object to check 2142 * 2143 * Return the required GTT alignment for an object, taking into account 2144 * potential fence register mapping. 2145 */ 2146 uint32_t 2147 i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size, 2148 int tiling_mode, bool fenced) 2149 { 2150 /* 2151 * Minimum alignment is 4k (GTT page size), but might be greater 2152 * if a fence register is needed for the object. 2153 */ 2154 if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) || 2155 tiling_mode == I915_TILING_NONE) 2156 return 4096; 2157 2158 /* 2159 * Previous chips need to be aligned to the size of the smallest 2160 * fence register that can contain the object. 2161 */ 2162 return i915_gem_get_gtt_size(dev, size, tiling_mode); 2163 } 2164 2165 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) 2166 { 2167 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2168 int ret; 2169 2170 #if 0 2171 if (drm_vma_node_has_offset(&obj->base.vma_node)) 2172 return 0; 2173 #endif 2174 2175 dev_priv->mm.shrinker_no_lock_stealing = true; 2176 2177 ret = drm_gem_create_mmap_offset(&obj->base); 2178 if (ret != -ENOSPC) 2179 goto out; 2180 2181 /* Badly fragmented mmap space? The only way we can recover 2182 * space is by destroying unwanted objects. We can't randomly release 2183 * mmap_offsets as userspace expects them to be persistent for the 2184 * lifetime of the objects. The closest we can is to release the 2185 * offsets on purgeable objects by truncating it and marking it purged, 2186 * which prevents userspace from ever using that object again. 2187 */ 2188 i915_gem_shrink(dev_priv, 2189 obj->base.size >> PAGE_SHIFT, 2190 I915_SHRINK_BOUND | 2191 I915_SHRINK_UNBOUND | 2192 I915_SHRINK_PURGEABLE); 2193 ret = drm_gem_create_mmap_offset(&obj->base); 2194 if (ret != -ENOSPC) 2195 goto out; 2196 2197 i915_gem_shrink_all(dev_priv); 2198 ret = drm_gem_create_mmap_offset(&obj->base); 2199 out: 2200 dev_priv->mm.shrinker_no_lock_stealing = false; 2201 2202 return ret; 2203 } 2204 2205 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) 2206 { 2207 drm_gem_free_mmap_offset(&obj->base); 2208 } 2209 2210 int 2211 i915_gem_mmap_gtt(struct drm_file *file, 2212 struct drm_device *dev, 2213 uint32_t handle, 2214 uint64_t *offset) 2215 { 2216 struct drm_i915_gem_object *obj; 2217 int ret; 2218 2219 ret = i915_mutex_lock_interruptible(dev); 2220 if (ret) 2221 return ret; 2222 2223 obj = to_intel_bo(drm_gem_object_lookup(file, handle)); 2224 if (&obj->base == NULL) { 2225 ret = -ENOENT; 2226 goto unlock; 2227 } 2228 2229 if (obj->madv != I915_MADV_WILLNEED) { 2230 DRM_DEBUG("Attempting to mmap a purgeable buffer\n"); 2231 ret = -EFAULT; 2232 goto out; 2233 } 2234 2235 ret = i915_gem_object_create_mmap_offset(obj); 2236 if (ret) 2237 goto out; 2238 2239 *offset = DRM_GEM_MAPPING_OFF(obj->base.map_list.key) | 2240 DRM_GEM_MAPPING_KEY; 2241 2242 out: 2243 drm_gem_object_unreference(&obj->base); 2244 unlock: 2245 mutex_unlock(&dev->struct_mutex); 2246 return ret; 2247 } 2248 2249 /** 2250 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 2251 * @dev: DRM device 2252 * @data: GTT mapping ioctl data 2253 * @file: GEM object info 2254 * 2255 * Simply returns the fake offset to userspace so it can mmap it. 2256 * The mmap call will end up in drm_gem_mmap(), which will set things 2257 * up so we can get faults in the handler above. 2258 * 2259 * The fault handler will take care of binding the object into the GTT 2260 * (since it may have been evicted to make room for something), allocating 2261 * a fence register, and mapping the appropriate aperture address into 2262 * userspace. 2263 */ 2264 int 2265 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 2266 struct drm_file *file) 2267 { 2268 struct drm_i915_gem_mmap_gtt *args = data; 2269 2270 return i915_gem_mmap_gtt(file, dev, args->handle, (uint64_t *)&args->offset); 2271 } 2272 2273 /* Immediately discard the backing storage */ 2274 static void 2275 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 2276 { 2277 vm_object_t vm_obj; 2278 2279 vm_obj = obj->base.filp; 2280 VM_OBJECT_LOCK(vm_obj); 2281 vm_object_page_remove(vm_obj, 0, 0, false); 2282 VM_OBJECT_UNLOCK(vm_obj); 2283 2284 obj->madv = __I915_MADV_PURGED; 2285 } 2286 2287 /* Try to discard unwanted pages */ 2288 static void 2289 i915_gem_object_invalidate(struct drm_i915_gem_object *obj) 2290 { 2291 #if 0 2292 struct address_space *mapping; 2293 #endif 2294 2295 switch (obj->madv) { 2296 case I915_MADV_DONTNEED: 2297 i915_gem_object_truncate(obj); 2298 case __I915_MADV_PURGED: 2299 return; 2300 } 2301 2302 if (obj->base.filp == NULL) 2303 return; 2304 2305 #if 0 2306 mapping = file_inode(obj->base.filp)->i_mapping, 2307 #endif 2308 invalidate_mapping_pages(obj->base.filp, 0, (loff_t)-1); 2309 } 2310 2311 static void 2312 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) 2313 { 2314 struct sg_page_iter sg_iter; 2315 int ret; 2316 2317 BUG_ON(obj->madv == __I915_MADV_PURGED); 2318 2319 ret = i915_gem_object_set_to_cpu_domain(obj, true); 2320 if (WARN_ON(ret)) { 2321 /* In the event of a disaster, abandon all caches and 2322 * hope for the best. 2323 */ 2324 i915_gem_clflush_object(obj, true); 2325 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 2326 } 2327 2328 i915_gem_gtt_finish_object(obj); 2329 2330 if (i915_gem_object_needs_bit17_swizzle(obj)) 2331 i915_gem_object_save_bit_17_swizzle(obj); 2332 2333 if (obj->madv == I915_MADV_DONTNEED) 2334 obj->dirty = 0; 2335 2336 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) { 2337 struct page *page = sg_page_iter_page(&sg_iter); 2338 2339 if (obj->dirty) 2340 set_page_dirty(page); 2341 2342 if (obj->madv == I915_MADV_WILLNEED) 2343 mark_page_accessed(page); 2344 2345 vm_page_busy_wait((struct vm_page *)page, FALSE, "i915gem"); 2346 vm_page_unwire((struct vm_page *)page, 1); 2347 vm_page_wakeup((struct vm_page *)page); 2348 } 2349 obj->dirty = 0; 2350 2351 sg_free_table(obj->pages); 2352 kfree(obj->pages); 2353 } 2354 2355 int 2356 i915_gem_object_put_pages(struct drm_i915_gem_object *obj) 2357 { 2358 const struct drm_i915_gem_object_ops *ops = obj->ops; 2359 2360 if (obj->pages == NULL) 2361 return 0; 2362 2363 if (obj->pages_pin_count) 2364 return -EBUSY; 2365 2366 BUG_ON(i915_gem_obj_bound_any(obj)); 2367 2368 /* ->put_pages might need to allocate memory for the bit17 swizzle 2369 * array, hence protect them from being reaped by removing them from gtt 2370 * lists early. */ 2371 list_del(&obj->global_list); 2372 2373 if (obj->mapping) { 2374 if (is_vmalloc_addr(obj->mapping)) 2375 vunmap(obj->mapping); 2376 else 2377 kunmap(kmap_to_page(obj->mapping)); 2378 obj->mapping = NULL; 2379 } 2380 2381 ops->put_pages(obj); 2382 obj->pages = NULL; 2383 2384 i915_gem_object_invalidate(obj); 2385 2386 return 0; 2387 } 2388 2389 static int 2390 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 2391 { 2392 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2393 int page_count, i; 2394 vm_object_t vm_obj; 2395 struct sg_table *st; 2396 struct scatterlist *sg; 2397 struct sg_page_iter sg_iter; 2398 struct page *page; 2399 unsigned long last_pfn = 0; /* suppress gcc warning */ 2400 int ret; 2401 2402 /* Assert that the object is not currently in any GPU domain. As it 2403 * wasn't in the GTT, there shouldn't be any way it could have been in 2404 * a GPU cache 2405 */ 2406 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); 2407 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); 2408 2409 st = kmalloc(sizeof(*st), M_DRM, M_WAITOK); 2410 if (st == NULL) 2411 return -ENOMEM; 2412 2413 page_count = obj->base.size / PAGE_SIZE; 2414 if (sg_alloc_table(st, page_count, GFP_KERNEL)) { 2415 kfree(st); 2416 return -ENOMEM; 2417 } 2418 2419 /* Get the list of pages out of our struct file. They'll be pinned 2420 * at this point until we release them. 2421 * 2422 * Fail silently without starting the shrinker 2423 */ 2424 vm_obj = obj->base.filp; 2425 VM_OBJECT_LOCK(vm_obj); 2426 sg = st->sgl; 2427 st->nents = 0; 2428 for (i = 0; i < page_count; i++) { 2429 page = shmem_read_mapping_page(vm_obj, i); 2430 if (IS_ERR(page)) { 2431 i915_gem_shrink(dev_priv, 2432 page_count, 2433 I915_SHRINK_BOUND | 2434 I915_SHRINK_UNBOUND | 2435 I915_SHRINK_PURGEABLE); 2436 page = shmem_read_mapping_page(vm_obj, i); 2437 } 2438 if (IS_ERR(page)) { 2439 /* We've tried hard to allocate the memory by reaping 2440 * our own buffer, now let the real VM do its job and 2441 * go down in flames if truly OOM. 2442 */ 2443 i915_gem_shrink_all(dev_priv); 2444 page = shmem_read_mapping_page(vm_obj, i); 2445 if (IS_ERR(page)) { 2446 ret = PTR_ERR(page); 2447 goto err_pages; 2448 } 2449 } 2450 #ifdef CONFIG_SWIOTLB 2451 if (swiotlb_nr_tbl()) { 2452 st->nents++; 2453 sg_set_page(sg, page, PAGE_SIZE, 0); 2454 sg = sg_next(sg); 2455 continue; 2456 } 2457 #endif 2458 if (!i || page_to_pfn(page) != last_pfn + 1) { 2459 if (i) 2460 sg = sg_next(sg); 2461 st->nents++; 2462 sg_set_page(sg, page, PAGE_SIZE, 0); 2463 } else { 2464 sg->length += PAGE_SIZE; 2465 } 2466 last_pfn = page_to_pfn(page); 2467 2468 /* Check that the i965g/gm workaround works. */ 2469 } 2470 #ifdef CONFIG_SWIOTLB 2471 if (!swiotlb_nr_tbl()) 2472 #endif 2473 sg_mark_end(sg); 2474 obj->pages = st; 2475 VM_OBJECT_UNLOCK(vm_obj); 2476 2477 ret = i915_gem_gtt_prepare_object(obj); 2478 if (ret) 2479 goto err_pages; 2480 2481 if (i915_gem_object_needs_bit17_swizzle(obj)) 2482 i915_gem_object_do_bit_17_swizzle(obj); 2483 2484 if (obj->tiling_mode != I915_TILING_NONE && 2485 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) 2486 i915_gem_object_pin_pages(obj); 2487 2488 return 0; 2489 2490 err_pages: 2491 sg_mark_end(sg); 2492 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) { 2493 struct vm_page *vmp = (struct vm_page *)sg_page_iter_page(&sg_iter); 2494 vm_page_busy_wait(vmp, FALSE, "i915gem"); 2495 vm_page_unwire(vmp, 0); 2496 vm_page_wakeup(vmp); 2497 } 2498 VM_OBJECT_UNLOCK(vm_obj); 2499 sg_free_table(st); 2500 kfree(st); 2501 2502 /* shmemfs first checks if there is enough memory to allocate the page 2503 * and reports ENOSPC should there be insufficient, along with the usual 2504 * ENOMEM for a genuine allocation failure. 2505 * 2506 * We use ENOSPC in our driver to mean that we have run out of aperture 2507 * space and so want to translate the error from shmemfs back to our 2508 * usual understanding of ENOMEM. 2509 */ 2510 if (ret == -ENOSPC) 2511 ret = -ENOMEM; 2512 2513 return ret; 2514 } 2515 2516 /* Ensure that the associated pages are gathered from the backing storage 2517 * and pinned into our object. i915_gem_object_get_pages() may be called 2518 * multiple times before they are released by a single call to 2519 * i915_gem_object_put_pages() - once the pages are no longer referenced 2520 * either as a result of memory pressure (reaping pages under the shrinker) 2521 * or as the object is itself released. 2522 */ 2523 int 2524 i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2525 { 2526 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2527 const struct drm_i915_gem_object_ops *ops = obj->ops; 2528 int ret; 2529 2530 if (obj->pages) 2531 return 0; 2532 2533 if (obj->madv != I915_MADV_WILLNEED) { 2534 DRM_DEBUG("Attempting to obtain a purgeable object\n"); 2535 return -EFAULT; 2536 } 2537 2538 BUG_ON(obj->pages_pin_count); 2539 2540 ret = ops->get_pages(obj); 2541 if (ret) 2542 return ret; 2543 2544 list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list); 2545 2546 obj->get_page.sg = obj->pages->sgl; 2547 obj->get_page.last = 0; 2548 2549 return 0; 2550 } 2551 2552 void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj) 2553 { 2554 int ret; 2555 2556 lockdep_assert_held(&obj->base.dev->struct_mutex); 2557 2558 ret = i915_gem_object_get_pages(obj); 2559 if (ret) 2560 return ERR_PTR(ret); 2561 2562 i915_gem_object_pin_pages(obj); 2563 2564 if (obj->mapping == NULL) { 2565 struct page **pages; 2566 2567 pages = NULL; 2568 if (obj->base.size == PAGE_SIZE) 2569 obj->mapping = kmap(sg_page(obj->pages->sgl)); 2570 else 2571 pages = drm_malloc_gfp(obj->base.size >> PAGE_SHIFT, 2572 sizeof(*pages), 2573 GFP_TEMPORARY); 2574 if (pages != NULL) { 2575 struct sg_page_iter sg_iter; 2576 int n; 2577 2578 n = 0; 2579 for_each_sg_page(obj->pages->sgl, &sg_iter, 2580 obj->pages->nents, 0) 2581 pages[n++] = sg_page_iter_page(&sg_iter); 2582 2583 obj->mapping = vmap(pages, n, 0, PAGE_KERNEL); 2584 drm_free_large(pages); 2585 } 2586 if (obj->mapping == NULL) { 2587 i915_gem_object_unpin_pages(obj); 2588 return ERR_PTR(-ENOMEM); 2589 } 2590 } 2591 2592 return obj->mapping; 2593 } 2594 2595 void i915_vma_move_to_active(struct i915_vma *vma, 2596 struct drm_i915_gem_request *req) 2597 { 2598 struct drm_i915_gem_object *obj = vma->obj; 2599 struct intel_engine_cs *engine; 2600 2601 engine = i915_gem_request_get_engine(req); 2602 2603 /* Add a reference if we're newly entering the active list. */ 2604 if (obj->active == 0) 2605 drm_gem_object_reference(&obj->base); 2606 obj->active |= intel_engine_flag(engine); 2607 2608 list_move_tail(&obj->engine_list[engine->id], &engine->active_list); 2609 i915_gem_request_assign(&obj->last_read_req[engine->id], req); 2610 2611 list_move_tail(&vma->vm_link, &vma->vm->active_list); 2612 } 2613 2614 static void 2615 i915_gem_object_retire__write(struct drm_i915_gem_object *obj) 2616 { 2617 GEM_BUG_ON(obj->last_write_req == NULL); 2618 GEM_BUG_ON(!(obj->active & intel_engine_flag(obj->last_write_req->engine))); 2619 2620 i915_gem_request_assign(&obj->last_write_req, NULL); 2621 intel_fb_obj_flush(obj, true, ORIGIN_CS); 2622 } 2623 2624 static void 2625 i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring) 2626 { 2627 struct i915_vma *vma; 2628 2629 GEM_BUG_ON(obj->last_read_req[ring] == NULL); 2630 GEM_BUG_ON(!(obj->active & (1 << ring))); 2631 2632 list_del_init(&obj->engine_list[ring]); 2633 i915_gem_request_assign(&obj->last_read_req[ring], NULL); 2634 2635 if (obj->last_write_req && obj->last_write_req->engine->id == ring) 2636 i915_gem_object_retire__write(obj); 2637 2638 obj->active &= ~(1 << ring); 2639 if (obj->active) 2640 return; 2641 2642 /* Bump our place on the bound list to keep it roughly in LRU order 2643 * so that we don't steal from recently used but inactive objects 2644 * (unless we are forced to ofc!) 2645 */ 2646 list_move_tail(&obj->global_list, 2647 &to_i915(obj->base.dev)->mm.bound_list); 2648 2649 list_for_each_entry(vma, &obj->vma_list, obj_link) { 2650 if (!list_empty(&vma->vm_link)) 2651 list_move_tail(&vma->vm_link, &vma->vm->inactive_list); 2652 } 2653 2654 i915_gem_request_assign(&obj->last_fenced_req, NULL); 2655 drm_gem_object_unreference(&obj->base); 2656 } 2657 2658 static int 2659 i915_gem_init_seqno(struct drm_device *dev, u32 seqno) 2660 { 2661 struct drm_i915_private *dev_priv = dev->dev_private; 2662 struct intel_engine_cs *engine; 2663 int ret; 2664 2665 /* Carefully retire all requests without writing to the rings */ 2666 for_each_engine(engine, dev_priv) { 2667 ret = intel_engine_idle(engine); 2668 if (ret) 2669 return ret; 2670 } 2671 i915_gem_retire_requests(dev); 2672 2673 /* Finally reset hw state */ 2674 for_each_engine(engine, dev_priv) 2675 intel_ring_init_seqno(engine, seqno); 2676 2677 return 0; 2678 } 2679 2680 int i915_gem_set_seqno(struct drm_device *dev, u32 seqno) 2681 { 2682 struct drm_i915_private *dev_priv = dev->dev_private; 2683 int ret; 2684 2685 if (seqno == 0) 2686 return -EINVAL; 2687 2688 /* HWS page needs to be set less than what we 2689 * will inject to ring 2690 */ 2691 ret = i915_gem_init_seqno(dev, seqno - 1); 2692 if (ret) 2693 return ret; 2694 2695 /* Carefully set the last_seqno value so that wrap 2696 * detection still works 2697 */ 2698 dev_priv->next_seqno = seqno; 2699 dev_priv->last_seqno = seqno - 1; 2700 if (dev_priv->last_seqno == 0) 2701 dev_priv->last_seqno--; 2702 2703 return 0; 2704 } 2705 2706 int 2707 i915_gem_get_seqno(struct drm_device *dev, u32 *seqno) 2708 { 2709 struct drm_i915_private *dev_priv = dev->dev_private; 2710 2711 /* reserve 0 for non-seqno */ 2712 if (dev_priv->next_seqno == 0) { 2713 int ret = i915_gem_init_seqno(dev, 0); 2714 if (ret) 2715 return ret; 2716 2717 dev_priv->next_seqno = 1; 2718 } 2719 2720 *seqno = dev_priv->last_seqno = dev_priv->next_seqno++; 2721 return 0; 2722 } 2723 2724 /* 2725 * NB: This function is not allowed to fail. Doing so would mean the the 2726 * request is not being tracked for completion but the work itself is 2727 * going to happen on the hardware. This would be a Bad Thing(tm). 2728 */ 2729 void __i915_add_request(struct drm_i915_gem_request *request, 2730 struct drm_i915_gem_object *obj, 2731 bool flush_caches) 2732 { 2733 struct intel_engine_cs *engine; 2734 struct drm_i915_private *dev_priv; 2735 struct intel_ringbuffer *ringbuf; 2736 u32 request_start; 2737 int ret; 2738 2739 if (WARN_ON(request == NULL)) 2740 return; 2741 2742 engine = request->engine; 2743 dev_priv = request->i915; 2744 ringbuf = request->ringbuf; 2745 2746 /* 2747 * To ensure that this call will not fail, space for its emissions 2748 * should already have been reserved in the ring buffer. Let the ring 2749 * know that it is time to use that space up. 2750 */ 2751 intel_ring_reserved_space_use(ringbuf); 2752 2753 request_start = intel_ring_get_tail(ringbuf); 2754 /* 2755 * Emit any outstanding flushes - execbuf can fail to emit the flush 2756 * after having emitted the batchbuffer command. Hence we need to fix 2757 * things up similar to emitting the lazy request. The difference here 2758 * is that the flush _must_ happen before the next request, no matter 2759 * what. 2760 */ 2761 if (flush_caches) { 2762 if (i915.enable_execlists) 2763 ret = logical_ring_flush_all_caches(request); 2764 else 2765 ret = intel_ring_flush_all_caches(request); 2766 /* Not allowed to fail! */ 2767 WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret); 2768 } 2769 2770 trace_i915_gem_request_add(request); 2771 2772 request->head = request_start; 2773 2774 /* Whilst this request exists, batch_obj will be on the 2775 * active_list, and so will hold the active reference. Only when this 2776 * request is retired will the the batch_obj be moved onto the 2777 * inactive_list and lose its active reference. Hence we do not need 2778 * to explicitly hold another reference here. 2779 */ 2780 request->batch_obj = obj; 2781 2782 /* Seal the request and mark it as pending execution. Note that 2783 * we may inspect this state, without holding any locks, during 2784 * hangcheck. Hence we apply the barrier to ensure that we do not 2785 * see a more recent value in the hws than we are tracking. 2786 */ 2787 request->emitted_jiffies = jiffies; 2788 request->previous_seqno = engine->last_submitted_seqno; 2789 smp_store_mb(engine->last_submitted_seqno, request->seqno); 2790 list_add_tail(&request->list, &engine->request_list); 2791 2792 /* Record the position of the start of the request so that 2793 * should we detect the updated seqno part-way through the 2794 * GPU processing the request, we never over-estimate the 2795 * position of the head. 2796 */ 2797 request->postfix = intel_ring_get_tail(ringbuf); 2798 2799 if (i915.enable_execlists) 2800 ret = engine->emit_request(request); 2801 else { 2802 ret = engine->add_request(request); 2803 2804 request->tail = intel_ring_get_tail(ringbuf); 2805 } 2806 2807 /* Not allowed to fail! */ 2808 WARN(ret, "emit|add_request failed: %d!\n", ret); 2809 2810 i915_queue_hangcheck(engine->dev); 2811 2812 queue_delayed_work(dev_priv->wq, 2813 &dev_priv->mm.retire_work, 2814 round_jiffies_up_relative(HZ)); 2815 intel_mark_busy(dev_priv->dev); 2816 2817 /* Sanity check that the reserved size was large enough. */ 2818 intel_ring_reserved_space_end(ringbuf); 2819 } 2820 2821 static bool i915_context_is_banned(struct drm_i915_private *dev_priv, 2822 const struct intel_context *ctx) 2823 { 2824 unsigned long elapsed; 2825 2826 elapsed = get_seconds() - ctx->hang_stats.guilty_ts; 2827 2828 if (ctx->hang_stats.banned) 2829 return true; 2830 2831 if (ctx->hang_stats.ban_period_seconds && 2832 elapsed <= ctx->hang_stats.ban_period_seconds) { 2833 if (!i915_gem_context_is_default(ctx)) { 2834 DRM_DEBUG("context hanging too fast, banning!\n"); 2835 return true; 2836 } else if (i915_stop_ring_allow_ban(dev_priv)) { 2837 if (i915_stop_ring_allow_warn(dev_priv)) 2838 DRM_ERROR("gpu hanging too fast, banning!\n"); 2839 return true; 2840 } 2841 } 2842 2843 return false; 2844 } 2845 2846 static void i915_set_reset_status(struct drm_i915_private *dev_priv, 2847 struct intel_context *ctx, 2848 const bool guilty) 2849 { 2850 struct i915_ctx_hang_stats *hs; 2851 2852 if (WARN_ON(!ctx)) 2853 return; 2854 2855 hs = &ctx->hang_stats; 2856 2857 if (guilty) { 2858 hs->banned = i915_context_is_banned(dev_priv, ctx); 2859 hs->batch_active++; 2860 hs->guilty_ts = get_seconds(); 2861 } else { 2862 hs->batch_pending++; 2863 } 2864 } 2865 2866 void i915_gem_request_free(struct kref *req_ref) 2867 { 2868 struct drm_i915_gem_request *req = container_of(req_ref, 2869 typeof(*req), ref); 2870 struct intel_context *ctx = req->ctx; 2871 2872 if (req->file_priv) 2873 i915_gem_request_remove_from_client(req); 2874 2875 if (ctx) { 2876 if (i915.enable_execlists && ctx != req->i915->kernel_context) 2877 intel_lr_context_unpin(ctx, req->engine); 2878 2879 i915_gem_context_unreference(ctx); 2880 } 2881 2882 kfree(req); 2883 } 2884 2885 static inline int 2886 __i915_gem_request_alloc(struct intel_engine_cs *engine, 2887 struct intel_context *ctx, 2888 struct drm_i915_gem_request **req_out) 2889 { 2890 struct drm_i915_private *dev_priv = to_i915(engine->dev); 2891 unsigned reset_counter = i915_reset_counter(&dev_priv->gpu_error); 2892 struct drm_i915_gem_request *req; 2893 int ret; 2894 2895 if (!req_out) 2896 return -EINVAL; 2897 2898 *req_out = NULL; 2899 2900 /* ABI: Before userspace accesses the GPU (e.g. execbuffer), report 2901 * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex 2902 * and restart. 2903 */ 2904 ret = i915_gem_check_wedge(reset_counter, dev_priv->mm.interruptible); 2905 if (ret) 2906 return ret; 2907 2908 req = kzalloc(sizeof(*req), GFP_KERNEL); 2909 if (req == NULL) 2910 return -ENOMEM; 2911 2912 ret = i915_gem_get_seqno(engine->dev, &req->seqno); 2913 if (ret) 2914 goto err; 2915 2916 kref_init(&req->ref); 2917 req->i915 = dev_priv; 2918 req->engine = engine; 2919 req->reset_counter = reset_counter; 2920 req->ctx = ctx; 2921 i915_gem_context_reference(req->ctx); 2922 2923 if (i915.enable_execlists) 2924 ret = intel_logical_ring_alloc_request_extras(req); 2925 else 2926 ret = intel_ring_alloc_request_extras(req); 2927 if (ret) { 2928 i915_gem_context_unreference(req->ctx); 2929 goto err; 2930 } 2931 2932 /* 2933 * Reserve space in the ring buffer for all the commands required to 2934 * eventually emit this request. This is to guarantee that the 2935 * i915_add_request() call can't fail. Note that the reserve may need 2936 * to be redone if the request is not actually submitted straight 2937 * away, e.g. because a GPU scheduler has deferred it. 2938 */ 2939 if (i915.enable_execlists) 2940 ret = intel_logical_ring_reserve_space(req); 2941 else 2942 ret = intel_ring_reserve_space(req); 2943 if (ret) { 2944 /* 2945 * At this point, the request is fully allocated even if not 2946 * fully prepared. Thus it can be cleaned up using the proper 2947 * free code. 2948 */ 2949 intel_ring_reserved_space_cancel(req->ringbuf); 2950 i915_gem_request_unreference(req); 2951 return ret; 2952 } 2953 2954 *req_out = req; 2955 return 0; 2956 2957 err: 2958 kfree(req); 2959 return ret; 2960 } 2961 2962 /** 2963 * i915_gem_request_alloc - allocate a request structure 2964 * 2965 * @engine: engine that we wish to issue the request on. 2966 * @ctx: context that the request will be associated with. 2967 * This can be NULL if the request is not directly related to 2968 * any specific user context, in which case this function will 2969 * choose an appropriate context to use. 2970 * 2971 * Returns a pointer to the allocated request if successful, 2972 * or an error code if not. 2973 */ 2974 struct drm_i915_gem_request * 2975 i915_gem_request_alloc(struct intel_engine_cs *engine, 2976 struct intel_context *ctx) 2977 { 2978 struct drm_i915_gem_request *req; 2979 int err; 2980 2981 if (ctx == NULL) 2982 ctx = to_i915(engine->dev)->kernel_context; 2983 err = __i915_gem_request_alloc(engine, ctx, &req); 2984 return err ? ERR_PTR(err) : req; 2985 } 2986 2987 struct drm_i915_gem_request * 2988 i915_gem_find_active_request(struct intel_engine_cs *engine) 2989 { 2990 struct drm_i915_gem_request *request; 2991 2992 list_for_each_entry(request, &engine->request_list, list) { 2993 if (i915_gem_request_completed(request, false)) 2994 continue; 2995 2996 return request; 2997 } 2998 2999 return NULL; 3000 } 3001 3002 static void i915_gem_reset_engine_status(struct drm_i915_private *dev_priv, 3003 struct intel_engine_cs *engine) 3004 { 3005 struct drm_i915_gem_request *request; 3006 bool ring_hung; 3007 3008 request = i915_gem_find_active_request(engine); 3009 3010 if (request == NULL) 3011 return; 3012 3013 ring_hung = engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG; 3014 3015 i915_set_reset_status(dev_priv, request->ctx, ring_hung); 3016 3017 list_for_each_entry_continue(request, &engine->request_list, list) 3018 i915_set_reset_status(dev_priv, request->ctx, false); 3019 } 3020 3021 static void i915_gem_reset_engine_cleanup(struct drm_i915_private *dev_priv, 3022 struct intel_engine_cs *engine) 3023 { 3024 struct intel_ringbuffer *buffer; 3025 3026 while (!list_empty(&engine->active_list)) { 3027 struct drm_i915_gem_object *obj; 3028 3029 obj = list_first_entry(&engine->active_list, 3030 struct drm_i915_gem_object, 3031 engine_list[engine->id]); 3032 3033 i915_gem_object_retire__read(obj, engine->id); 3034 } 3035 3036 /* 3037 * Clear the execlists queue up before freeing the requests, as those 3038 * are the ones that keep the context and ringbuffer backing objects 3039 * pinned in place. 3040 */ 3041 3042 if (i915.enable_execlists) { 3043 /* Ensure irq handler finishes or is cancelled. */ 3044 tasklet_kill(&engine->irq_tasklet); 3045 3046 spin_lock_bh(&engine->execlist_lock); 3047 /* list_splice_tail_init checks for empty lists */ 3048 list_splice_tail_init(&engine->execlist_queue, 3049 &engine->execlist_retired_req_list); 3050 spin_unlock_bh(&engine->execlist_lock); 3051 3052 intel_execlists_retire_requests(engine); 3053 } 3054 3055 /* 3056 * We must free the requests after all the corresponding objects have 3057 * been moved off active lists. Which is the same order as the normal 3058 * retire_requests function does. This is important if object hold 3059 * implicit references on things like e.g. ppgtt address spaces through 3060 * the request. 3061 */ 3062 while (!list_empty(&engine->request_list)) { 3063 struct drm_i915_gem_request *request; 3064 3065 request = list_first_entry(&engine->request_list, 3066 struct drm_i915_gem_request, 3067 list); 3068 3069 i915_gem_request_retire(request); 3070 } 3071 3072 /* Having flushed all requests from all queues, we know that all 3073 * ringbuffers must now be empty. However, since we do not reclaim 3074 * all space when retiring the request (to prevent HEADs colliding 3075 * with rapid ringbuffer wraparound) the amount of available space 3076 * upon reset is less than when we start. Do one more pass over 3077 * all the ringbuffers to reset last_retired_head. 3078 */ 3079 list_for_each_entry(buffer, &engine->buffers, link) { 3080 buffer->last_retired_head = buffer->tail; 3081 intel_ring_update_space(buffer); 3082 } 3083 3084 intel_ring_init_seqno(engine, engine->last_submitted_seqno); 3085 } 3086 3087 void i915_gem_reset(struct drm_device *dev) 3088 { 3089 struct drm_i915_private *dev_priv = dev->dev_private; 3090 struct intel_engine_cs *engine; 3091 3092 /* 3093 * Before we free the objects from the requests, we need to inspect 3094 * them for finding the guilty party. As the requests only borrow 3095 * their reference to the objects, the inspection must be done first. 3096 */ 3097 for_each_engine(engine, dev_priv) 3098 i915_gem_reset_engine_status(dev_priv, engine); 3099 3100 for_each_engine(engine, dev_priv) 3101 i915_gem_reset_engine_cleanup(dev_priv, engine); 3102 3103 i915_gem_context_reset(dev); 3104 3105 i915_gem_restore_fences(dev); 3106 3107 WARN_ON(i915_verify_lists(dev)); 3108 } 3109 3110 /** 3111 * This function clears the request list as sequence numbers are passed. 3112 */ 3113 void 3114 i915_gem_retire_requests_ring(struct intel_engine_cs *engine) 3115 { 3116 WARN_ON(i915_verify_lists(engine->dev)); 3117 3118 /* Retire requests first as we use it above for the early return. 3119 * If we retire requests last, we may use a later seqno and so clear 3120 * the requests lists without clearing the active list, leading to 3121 * confusion. 3122 */ 3123 while (!list_empty(&engine->request_list)) { 3124 struct drm_i915_gem_request *request; 3125 3126 request = list_first_entry(&engine->request_list, 3127 struct drm_i915_gem_request, 3128 list); 3129 3130 if (!i915_gem_request_completed(request, true)) 3131 break; 3132 3133 i915_gem_request_retire(request); 3134 } 3135 3136 /* Move any buffers on the active list that are no longer referenced 3137 * by the ringbuffer to the flushing/inactive lists as appropriate, 3138 * before we free the context associated with the requests. 3139 */ 3140 while (!list_empty(&engine->active_list)) { 3141 struct drm_i915_gem_object *obj; 3142 3143 obj = list_first_entry(&engine->active_list, 3144 struct drm_i915_gem_object, 3145 engine_list[engine->id]); 3146 3147 if (!list_empty(&obj->last_read_req[engine->id]->list)) 3148 break; 3149 3150 i915_gem_object_retire__read(obj, engine->id); 3151 } 3152 3153 if (unlikely(engine->trace_irq_req && 3154 i915_gem_request_completed(engine->trace_irq_req, true))) { 3155 engine->irq_put(engine); 3156 i915_gem_request_assign(&engine->trace_irq_req, NULL); 3157 } 3158 3159 WARN_ON(i915_verify_lists(engine->dev)); 3160 } 3161 3162 bool 3163 i915_gem_retire_requests(struct drm_device *dev) 3164 { 3165 struct drm_i915_private *dev_priv = dev->dev_private; 3166 struct intel_engine_cs *engine; 3167 bool idle = true; 3168 3169 for_each_engine(engine, dev_priv) { 3170 i915_gem_retire_requests_ring(engine); 3171 idle &= list_empty(&engine->request_list); 3172 if (i915.enable_execlists) { 3173 spin_lock_bh(&engine->execlist_lock); 3174 idle &= list_empty(&engine->execlist_queue); 3175 spin_unlock_bh(&engine->execlist_lock); 3176 3177 intel_execlists_retire_requests(engine); 3178 } 3179 } 3180 3181 if (idle) 3182 mod_delayed_work(dev_priv->wq, 3183 &dev_priv->mm.idle_work, 3184 msecs_to_jiffies(100)); 3185 3186 return idle; 3187 } 3188 3189 static void 3190 i915_gem_retire_work_handler(struct work_struct *work) 3191 { 3192 struct drm_i915_private *dev_priv = 3193 container_of(work, typeof(*dev_priv), mm.retire_work.work); 3194 struct drm_device *dev = dev_priv->dev; 3195 bool idle; 3196 3197 /* Come back later if the device is busy... */ 3198 idle = false; 3199 if (mutex_trylock(&dev->struct_mutex)) { 3200 idle = i915_gem_retire_requests(dev); 3201 mutex_unlock(&dev->struct_mutex); 3202 } 3203 if (!idle) 3204 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 3205 round_jiffies_up_relative(HZ)); 3206 } 3207 3208 static void 3209 i915_gem_idle_work_handler(struct work_struct *work) 3210 { 3211 struct drm_i915_private *dev_priv = 3212 container_of(work, typeof(*dev_priv), mm.idle_work.work); 3213 struct drm_device *dev = dev_priv->dev; 3214 struct intel_engine_cs *engine; 3215 3216 for_each_engine(engine, dev_priv) 3217 if (!list_empty(&engine->request_list)) 3218 return; 3219 3220 /* we probably should sync with hangcheck here, using cancel_work_sync. 3221 * Also locking seems to be fubar here, engine->request_list is protected 3222 * by dev->struct_mutex. */ 3223 3224 intel_mark_idle(dev); 3225 3226 if (mutex_trylock(&dev->struct_mutex)) { 3227 for_each_engine(engine, dev_priv) 3228 i915_gem_batch_pool_fini(&engine->batch_pool); 3229 3230 mutex_unlock(&dev->struct_mutex); 3231 } 3232 } 3233 3234 /** 3235 * Ensures that an object will eventually get non-busy by flushing any required 3236 * write domains, emitting any outstanding lazy request and retiring and 3237 * completed requests. 3238 */ 3239 static int 3240 i915_gem_object_flush_active(struct drm_i915_gem_object *obj) 3241 { 3242 int i; 3243 3244 if (!obj->active) 3245 return 0; 3246 3247 for (i = 0; i < I915_NUM_ENGINES; i++) { 3248 struct drm_i915_gem_request *req; 3249 3250 req = obj->last_read_req[i]; 3251 if (req == NULL) 3252 continue; 3253 3254 if (list_empty(&req->list)) 3255 goto retire; 3256 3257 if (i915_gem_request_completed(req, true)) { 3258 __i915_gem_request_retire__upto(req); 3259 retire: 3260 i915_gem_object_retire__read(obj, i); 3261 } 3262 } 3263 3264 return 0; 3265 } 3266 3267 /** 3268 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT 3269 * @DRM_IOCTL_ARGS: standard ioctl arguments 3270 * 3271 * Returns 0 if successful, else an error is returned with the remaining time in 3272 * the timeout parameter. 3273 * -ETIME: object is still busy after timeout 3274 * -ERESTARTSYS: signal interrupted the wait 3275 * -ENONENT: object doesn't exist 3276 * Also possible, but rare: 3277 * -EAGAIN: GPU wedged 3278 * -ENOMEM: damn 3279 * -ENODEV: Internal IRQ fail 3280 * -E?: The add request failed 3281 * 3282 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any 3283 * non-zero timeout parameter the wait ioctl will wait for the given number of 3284 * nanoseconds on an object becoming unbusy. Since the wait itself does so 3285 * without holding struct_mutex the object may become re-busied before this 3286 * function completes. A similar but shorter * race condition exists in the busy 3287 * ioctl 3288 */ 3289 int 3290 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 3291 { 3292 struct drm_i915_gem_wait *args = data; 3293 struct drm_i915_gem_object *obj; 3294 struct drm_i915_gem_request *req[I915_NUM_ENGINES]; 3295 int i, n = 0; 3296 int ret; 3297 3298 if (args->flags != 0) 3299 return -EINVAL; 3300 3301 ret = i915_mutex_lock_interruptible(dev); 3302 if (ret) 3303 return ret; 3304 3305 obj = to_intel_bo(drm_gem_object_lookup(file, args->bo_handle)); 3306 if (&obj->base == NULL) { 3307 mutex_unlock(&dev->struct_mutex); 3308 return -ENOENT; 3309 } 3310 3311 /* Need to make sure the object gets inactive eventually. */ 3312 ret = i915_gem_object_flush_active(obj); 3313 if (ret) 3314 goto out; 3315 3316 if (!obj->active) 3317 goto out; 3318 3319 /* Do this after OLR check to make sure we make forward progress polling 3320 * on this IOCTL with a timeout == 0 (like busy ioctl) 3321 */ 3322 if (args->timeout_ns == 0) { 3323 ret = -ETIME; 3324 goto out; 3325 } 3326 3327 drm_gem_object_unreference(&obj->base); 3328 3329 for (i = 0; i < I915_NUM_ENGINES; i++) { 3330 if (obj->last_read_req[i] == NULL) 3331 continue; 3332 3333 req[n++] = i915_gem_request_reference(obj->last_read_req[i]); 3334 } 3335 3336 mutex_unlock(&dev->struct_mutex); 3337 3338 for (i = 0; i < n; i++) { 3339 if (ret == 0) 3340 ret = __i915_wait_request(req[i], true, 3341 args->timeout_ns > 0 ? &args->timeout_ns : NULL, 3342 to_rps_client(file)); 3343 i915_gem_request_unreference__unlocked(req[i]); 3344 } 3345 return ret; 3346 3347 out: 3348 drm_gem_object_unreference(&obj->base); 3349 mutex_unlock(&dev->struct_mutex); 3350 return ret; 3351 } 3352 3353 static int 3354 __i915_gem_object_sync(struct drm_i915_gem_object *obj, 3355 struct intel_engine_cs *to, 3356 struct drm_i915_gem_request *from_req, 3357 struct drm_i915_gem_request **to_req) 3358 { 3359 struct intel_engine_cs *from; 3360 int ret; 3361 3362 from = i915_gem_request_get_engine(from_req); 3363 if (to == from) 3364 return 0; 3365 3366 if (i915_gem_request_completed(from_req, true)) 3367 return 0; 3368 3369 if (!i915_semaphore_is_enabled(obj->base.dev)) { 3370 struct drm_i915_private *i915 = to_i915(obj->base.dev); 3371 ret = __i915_wait_request(from_req, 3372 i915->mm.interruptible, 3373 NULL, 3374 &i915->rps.semaphores); 3375 if (ret) 3376 return ret; 3377 3378 i915_gem_object_retire_request(obj, from_req); 3379 } else { 3380 int idx = intel_ring_sync_index(from, to); 3381 u32 seqno = i915_gem_request_get_seqno(from_req); 3382 3383 WARN_ON(!to_req); 3384 3385 if (seqno <= from->semaphore.sync_seqno[idx]) 3386 return 0; 3387 3388 if (*to_req == NULL) { 3389 struct drm_i915_gem_request *req; 3390 3391 req = i915_gem_request_alloc(to, NULL); 3392 if (IS_ERR(req)) 3393 return PTR_ERR(req); 3394 3395 *to_req = req; 3396 } 3397 3398 trace_i915_gem_ring_sync_to(*to_req, from, from_req); 3399 ret = to->semaphore.sync_to(*to_req, from, seqno); 3400 if (ret) 3401 return ret; 3402 3403 /* We use last_read_req because sync_to() 3404 * might have just caused seqno wrap under 3405 * the radar. 3406 */ 3407 from->semaphore.sync_seqno[idx] = 3408 i915_gem_request_get_seqno(obj->last_read_req[from->id]); 3409 } 3410 3411 return 0; 3412 } 3413 3414 /** 3415 * i915_gem_object_sync - sync an object to a ring. 3416 * 3417 * @obj: object which may be in use on another ring. 3418 * @to: ring we wish to use the object on. May be NULL. 3419 * @to_req: request we wish to use the object for. See below. 3420 * This will be allocated and returned if a request is 3421 * required but not passed in. 3422 * 3423 * This code is meant to abstract object synchronization with the GPU. 3424 * Calling with NULL implies synchronizing the object with the CPU 3425 * rather than a particular GPU ring. Conceptually we serialise writes 3426 * between engines inside the GPU. We only allow one engine to write 3427 * into a buffer at any time, but multiple readers. To ensure each has 3428 * a coherent view of memory, we must: 3429 * 3430 * - If there is an outstanding write request to the object, the new 3431 * request must wait for it to complete (either CPU or in hw, requests 3432 * on the same ring will be naturally ordered). 3433 * 3434 * - If we are a write request (pending_write_domain is set), the new 3435 * request must wait for outstanding read requests to complete. 3436 * 3437 * For CPU synchronisation (NULL to) no request is required. For syncing with 3438 * rings to_req must be non-NULL. However, a request does not have to be 3439 * pre-allocated. If *to_req is NULL and sync commands will be emitted then a 3440 * request will be allocated automatically and returned through *to_req. Note 3441 * that it is not guaranteed that commands will be emitted (because the system 3442 * might already be idle). Hence there is no need to create a request that 3443 * might never have any work submitted. Note further that if a request is 3444 * returned in *to_req, it is the responsibility of the caller to submit 3445 * that request (after potentially adding more work to it). 3446 * 3447 * Returns 0 if successful, else propagates up the lower layer error. 3448 */ 3449 int 3450 i915_gem_object_sync(struct drm_i915_gem_object *obj, 3451 struct intel_engine_cs *to, 3452 struct drm_i915_gem_request **to_req) 3453 { 3454 const bool readonly = obj->base.pending_write_domain == 0; 3455 struct drm_i915_gem_request *req[I915_NUM_ENGINES]; 3456 int ret, i, n; 3457 3458 if (!obj->active) 3459 return 0; 3460 3461 if (to == NULL) 3462 return i915_gem_object_wait_rendering(obj, readonly); 3463 3464 n = 0; 3465 if (readonly) { 3466 if (obj->last_write_req) 3467 req[n++] = obj->last_write_req; 3468 } else { 3469 for (i = 0; i < I915_NUM_ENGINES; i++) 3470 if (obj->last_read_req[i]) 3471 req[n++] = obj->last_read_req[i]; 3472 } 3473 for (i = 0; i < n; i++) { 3474 ret = __i915_gem_object_sync(obj, to, req[i], to_req); 3475 if (ret) 3476 return ret; 3477 } 3478 3479 return 0; 3480 } 3481 3482 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) 3483 { 3484 u32 old_write_domain, old_read_domains; 3485 3486 /* Force a pagefault for domain tracking on next user access */ 3487 i915_gem_release_mmap(obj); 3488 3489 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3490 return; 3491 3492 old_read_domains = obj->base.read_domains; 3493 old_write_domain = obj->base.write_domain; 3494 3495 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT; 3496 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT; 3497 3498 trace_i915_gem_object_change_domain(obj, 3499 old_read_domains, 3500 old_write_domain); 3501 } 3502 3503 static int __i915_vma_unbind(struct i915_vma *vma, bool wait) 3504 { 3505 struct drm_i915_gem_object *obj = vma->obj; 3506 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3507 int ret; 3508 3509 if (list_empty(&vma->obj_link)) 3510 return 0; 3511 3512 if (!drm_mm_node_allocated(&vma->node)) { 3513 i915_gem_vma_destroy(vma); 3514 return 0; 3515 } 3516 3517 if (vma->pin_count) 3518 return -EBUSY; 3519 3520 BUG_ON(obj->pages == NULL); 3521 3522 if (wait) { 3523 ret = i915_gem_object_wait_rendering(obj, false); 3524 if (ret) 3525 return ret; 3526 } 3527 3528 if (vma->is_ggtt && vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { 3529 i915_gem_object_finish_gtt(obj); 3530 3531 /* release the fence reg _after_ flushing */ 3532 ret = i915_gem_object_put_fence(obj); 3533 if (ret) 3534 return ret; 3535 } 3536 3537 trace_i915_vma_unbind(vma); 3538 3539 vma->vm->unbind_vma(vma); 3540 vma->bound = 0; 3541 3542 list_del_init(&vma->vm_link); 3543 if (vma->is_ggtt) { 3544 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { 3545 obj->map_and_fenceable = false; 3546 } else if (vma->ggtt_view.pages) { 3547 sg_free_table(vma->ggtt_view.pages); 3548 kfree(vma->ggtt_view.pages); 3549 } 3550 vma->ggtt_view.pages = NULL; 3551 } 3552 3553 drm_mm_remove_node(&vma->node); 3554 i915_gem_vma_destroy(vma); 3555 3556 /* Since the unbound list is global, only move to that list if 3557 * no more VMAs exist. */ 3558 if (list_empty(&obj->vma_list)) 3559 list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list); 3560 3561 /* And finally now the object is completely decoupled from this vma, 3562 * we can drop its hold on the backing storage and allow it to be 3563 * reaped by the shrinker. 3564 */ 3565 i915_gem_object_unpin_pages(obj); 3566 3567 return 0; 3568 } 3569 3570 int i915_vma_unbind(struct i915_vma *vma) 3571 { 3572 return __i915_vma_unbind(vma, true); 3573 } 3574 3575 int __i915_vma_unbind_no_wait(struct i915_vma *vma) 3576 { 3577 return __i915_vma_unbind(vma, false); 3578 } 3579 3580 int i915_gpu_idle(struct drm_device *dev) 3581 { 3582 struct drm_i915_private *dev_priv = dev->dev_private; 3583 struct intel_engine_cs *engine; 3584 int ret; 3585 3586 /* Flush everything onto the inactive list. */ 3587 for_each_engine(engine, dev_priv) { 3588 if (!i915.enable_execlists) { 3589 struct drm_i915_gem_request *req; 3590 3591 req = i915_gem_request_alloc(engine, NULL); 3592 if (IS_ERR(req)) 3593 return PTR_ERR(req); 3594 3595 ret = i915_switch_context(req); 3596 i915_add_request_no_flush(req); 3597 if (ret) 3598 return ret; 3599 } 3600 3601 ret = intel_engine_idle(engine); 3602 if (ret) 3603 return ret; 3604 } 3605 3606 WARN_ON(i915_verify_lists(dev)); 3607 return 0; 3608 } 3609 3610 static bool i915_gem_valid_gtt_space(struct i915_vma *vma, 3611 unsigned long cache_level) 3612 { 3613 struct drm_mm_node *gtt_space = &vma->node; 3614 struct drm_mm_node *other; 3615 3616 /* 3617 * On some machines we have to be careful when putting differing types 3618 * of snoopable memory together to avoid the prefetcher crossing memory 3619 * domains and dying. During vm initialisation, we decide whether or not 3620 * these constraints apply and set the drm_mm.color_adjust 3621 * appropriately. 3622 */ 3623 if (vma->vm->mm.color_adjust == NULL) 3624 return true; 3625 3626 if (!drm_mm_node_allocated(gtt_space)) 3627 return true; 3628 3629 if (list_empty(>t_space->node_list)) 3630 return true; 3631 3632 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list); 3633 if (other->allocated && !other->hole_follows && other->color != cache_level) 3634 return false; 3635 3636 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list); 3637 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level) 3638 return false; 3639 3640 return true; 3641 } 3642 3643 /** 3644 * Finds free space in the GTT aperture and binds the object or a view of it 3645 * there. 3646 */ 3647 static struct i915_vma * 3648 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, 3649 struct i915_address_space *vm, 3650 const struct i915_ggtt_view *ggtt_view, 3651 unsigned alignment, 3652 uint64_t flags) 3653 { 3654 struct drm_device *dev = obj->base.dev; 3655 struct drm_i915_private *dev_priv = to_i915(dev); 3656 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3657 u32 fence_alignment, unfenced_alignment; 3658 u32 search_flag, alloc_flag; 3659 u64 start, end; 3660 u64 size, fence_size; 3661 struct i915_vma *vma; 3662 int ret; 3663 3664 if (i915_is_ggtt(vm)) { 3665 u32 view_size; 3666 3667 if (WARN_ON(!ggtt_view)) 3668 return ERR_PTR(-EINVAL); 3669 3670 view_size = i915_ggtt_view_size(obj, ggtt_view); 3671 3672 fence_size = i915_gem_get_gtt_size(dev, 3673 view_size, 3674 obj->tiling_mode); 3675 fence_alignment = i915_gem_get_gtt_alignment(dev, 3676 view_size, 3677 obj->tiling_mode, 3678 true); 3679 unfenced_alignment = i915_gem_get_gtt_alignment(dev, 3680 view_size, 3681 obj->tiling_mode, 3682 false); 3683 size = flags & PIN_MAPPABLE ? fence_size : view_size; 3684 } else { 3685 fence_size = i915_gem_get_gtt_size(dev, 3686 obj->base.size, 3687 obj->tiling_mode); 3688 fence_alignment = i915_gem_get_gtt_alignment(dev, 3689 obj->base.size, 3690 obj->tiling_mode, 3691 true); 3692 unfenced_alignment = 3693 i915_gem_get_gtt_alignment(dev, 3694 obj->base.size, 3695 obj->tiling_mode, 3696 false); 3697 size = flags & PIN_MAPPABLE ? fence_size : obj->base.size; 3698 } 3699 3700 start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; 3701 end = vm->total; 3702 if (flags & PIN_MAPPABLE) 3703 end = min_t(u64, end, ggtt->mappable_end); 3704 if (flags & PIN_ZONE_4G) 3705 end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE); 3706 3707 if (alignment == 0) 3708 alignment = flags & PIN_MAPPABLE ? fence_alignment : 3709 unfenced_alignment; 3710 if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) { 3711 DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n", 3712 ggtt_view ? ggtt_view->type : 0, 3713 alignment); 3714 return ERR_PTR(-EINVAL); 3715 } 3716 3717 /* If binding the object/GGTT view requires more space than the entire 3718 * aperture has, reject it early before evicting everything in a vain 3719 * attempt to find space. 3720 */ 3721 if (size > end) { 3722 DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%llu > %s aperture=%llu\n", 3723 ggtt_view ? ggtt_view->type : 0, 3724 size, 3725 flags & PIN_MAPPABLE ? "mappable" : "total", 3726 end); 3727 return ERR_PTR(-E2BIG); 3728 } 3729 3730 ret = i915_gem_object_get_pages(obj); 3731 if (ret) 3732 return ERR_PTR(ret); 3733 3734 i915_gem_object_pin_pages(obj); 3735 3736 vma = ggtt_view ? i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) : 3737 i915_gem_obj_lookup_or_create_vma(obj, vm); 3738 3739 if (IS_ERR(vma)) 3740 goto err_unpin; 3741 3742 if (flags & PIN_OFFSET_FIXED) { 3743 uint64_t offset = flags & PIN_OFFSET_MASK; 3744 3745 if (offset & (alignment - 1) || offset + size > end) { 3746 ret = -EINVAL; 3747 goto err_free_vma; 3748 } 3749 vma->node.start = offset; 3750 vma->node.size = size; 3751 vma->node.color = obj->cache_level; 3752 ret = drm_mm_reserve_node(&vm->mm, &vma->node); 3753 if (ret) { 3754 ret = i915_gem_evict_for_vma(vma); 3755 if (ret == 0) 3756 ret = drm_mm_reserve_node(&vm->mm, &vma->node); 3757 } 3758 if (ret) 3759 goto err_free_vma; 3760 } else { 3761 if (flags & PIN_HIGH) { 3762 search_flag = DRM_MM_SEARCH_BELOW; 3763 alloc_flag = DRM_MM_CREATE_TOP; 3764 } else { 3765 search_flag = DRM_MM_SEARCH_DEFAULT; 3766 alloc_flag = DRM_MM_CREATE_DEFAULT; 3767 } 3768 3769 search_free: 3770 ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, 3771 size, alignment, 3772 obj->cache_level, 3773 start, end, 3774 search_flag, 3775 alloc_flag); 3776 if (ret) { 3777 ret = i915_gem_evict_something(dev, vm, size, alignment, 3778 obj->cache_level, 3779 start, end, 3780 flags); 3781 if (ret == 0) 3782 goto search_free; 3783 3784 goto err_free_vma; 3785 } 3786 } 3787 if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) { 3788 ret = -EINVAL; 3789 goto err_remove_node; 3790 } 3791 3792 trace_i915_vma_bind(vma, flags); 3793 ret = i915_vma_bind(vma, obj->cache_level, flags); 3794 if (ret) 3795 goto err_remove_node; 3796 3797 list_move_tail(&obj->global_list, &dev_priv->mm.bound_list); 3798 list_add_tail(&vma->vm_link, &vm->inactive_list); 3799 3800 return vma; 3801 3802 err_remove_node: 3803 drm_mm_remove_node(&vma->node); 3804 err_free_vma: 3805 i915_gem_vma_destroy(vma); 3806 vma = ERR_PTR(ret); 3807 err_unpin: 3808 i915_gem_object_unpin_pages(obj); 3809 return vma; 3810 } 3811 3812 bool 3813 i915_gem_clflush_object(struct drm_i915_gem_object *obj, 3814 bool force) 3815 { 3816 /* If we don't have a page list set up, then we're not pinned 3817 * to GPU, and we can ignore the cache flush because it'll happen 3818 * again at bind time. 3819 */ 3820 if (obj->pages == NULL) 3821 return false; 3822 3823 /* 3824 * Stolen memory is always coherent with the GPU as it is explicitly 3825 * marked as wc by the system, or the system is cache-coherent. 3826 */ 3827 if (obj->stolen || obj->phys_handle) 3828 return false; 3829 3830 /* If the GPU is snooping the contents of the CPU cache, 3831 * we do not need to manually clear the CPU cache lines. However, 3832 * the caches are only snooped when the render cache is 3833 * flushed/invalidated. As we always have to emit invalidations 3834 * and flushes when moving into and out of the RENDER domain, correct 3835 * snooping behaviour occurs naturally as the result of our domain 3836 * tracking. 3837 */ 3838 if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) { 3839 obj->cache_dirty = true; 3840 return false; 3841 } 3842 3843 trace_i915_gem_object_clflush(obj); 3844 drm_clflush_sg(obj->pages); 3845 obj->cache_dirty = false; 3846 3847 return true; 3848 } 3849 3850 /** Flushes the GTT write domain for the object if it's dirty. */ 3851 static void 3852 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) 3853 { 3854 uint32_t old_write_domain; 3855 3856 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) 3857 return; 3858 3859 /* No actual flushing is required for the GTT write domain. Writes 3860 * to it immediately go to main memory as far as we know, so there's 3861 * no chipset flush. It also doesn't land in render cache. 3862 * 3863 * However, we do have to enforce the order so that all writes through 3864 * the GTT land before any writes to the device, such as updates to 3865 * the GATT itself. 3866 */ 3867 wmb(); 3868 3869 old_write_domain = obj->base.write_domain; 3870 obj->base.write_domain = 0; 3871 3872 intel_fb_obj_flush(obj, false, ORIGIN_GTT); 3873 3874 trace_i915_gem_object_change_domain(obj, 3875 obj->base.read_domains, 3876 old_write_domain); 3877 } 3878 3879 /** Flushes the CPU write domain for the object if it's dirty. */ 3880 static void 3881 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) 3882 { 3883 uint32_t old_write_domain; 3884 3885 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) 3886 return; 3887 3888 if (i915_gem_clflush_object(obj, obj->pin_display)) 3889 i915_gem_chipset_flush(obj->base.dev); 3890 3891 old_write_domain = obj->base.write_domain; 3892 obj->base.write_domain = 0; 3893 3894 intel_fb_obj_flush(obj, false, ORIGIN_CPU); 3895 3896 trace_i915_gem_object_change_domain(obj, 3897 obj->base.read_domains, 3898 old_write_domain); 3899 } 3900 3901 /** 3902 * Moves a single object to the GTT read, and possibly write domain. 3903 * 3904 * This function returns when the move is complete, including waiting on 3905 * flushes to occur. 3906 */ 3907 int 3908 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 3909 { 3910 struct drm_device *dev = obj->base.dev; 3911 struct drm_i915_private *dev_priv = to_i915(dev); 3912 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3913 uint32_t old_write_domain, old_read_domains; 3914 struct i915_vma *vma; 3915 int ret; 3916 3917 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) 3918 return 0; 3919 3920 ret = i915_gem_object_wait_rendering(obj, !write); 3921 if (ret) 3922 return ret; 3923 3924 /* Flush and acquire obj->pages so that we are coherent through 3925 * direct access in memory with previous cached writes through 3926 * shmemfs and that our cache domain tracking remains valid. 3927 * For example, if the obj->filp was moved to swap without us 3928 * being notified and releasing the pages, we would mistakenly 3929 * continue to assume that the obj remained out of the CPU cached 3930 * domain. 3931 */ 3932 ret = i915_gem_object_get_pages(obj); 3933 if (ret) 3934 return ret; 3935 3936 i915_gem_object_flush_cpu_write_domain(obj); 3937 3938 /* Serialise direct access to this object with the barriers for 3939 * coherent writes from the GPU, by effectively invalidating the 3940 * GTT domain upon first access. 3941 */ 3942 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3943 mb(); 3944 3945 old_write_domain = obj->base.write_domain; 3946 old_read_domains = obj->base.read_domains; 3947 3948 /* It should now be out of any other write domains, and we can update 3949 * the domain values for our changes. 3950 */ 3951 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 3952 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 3953 if (write) { 3954 obj->base.read_domains = I915_GEM_DOMAIN_GTT; 3955 obj->base.write_domain = I915_GEM_DOMAIN_GTT; 3956 obj->dirty = 1; 3957 } 3958 3959 trace_i915_gem_object_change_domain(obj, 3960 old_read_domains, 3961 old_write_domain); 3962 3963 /* And bump the LRU for this access */ 3964 vma = i915_gem_obj_to_ggtt(obj); 3965 if (vma && drm_mm_node_allocated(&vma->node) && !obj->active) 3966 list_move_tail(&vma->vm_link, 3967 &ggtt->base.inactive_list); 3968 3969 return 0; 3970 } 3971 3972 /** 3973 * Changes the cache-level of an object across all VMA. 3974 * 3975 * After this function returns, the object will be in the new cache-level 3976 * across all GTT and the contents of the backing storage will be coherent, 3977 * with respect to the new cache-level. In order to keep the backing storage 3978 * coherent for all users, we only allow a single cache level to be set 3979 * globally on the object and prevent it from being changed whilst the 3980 * hardware is reading from the object. That is if the object is currently 3981 * on the scanout it will be set to uncached (or equivalent display 3982 * cache coherency) and all non-MOCS GPU access will also be uncached so 3983 * that all direct access to the scanout remains coherent. 3984 */ 3985 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 3986 enum i915_cache_level cache_level) 3987 { 3988 struct drm_device *dev = obj->base.dev; 3989 struct i915_vma *vma, *next; 3990 bool bound = false; 3991 int ret = 0; 3992 3993 if (obj->cache_level == cache_level) 3994 goto out; 3995 3996 /* Inspect the list of currently bound VMA and unbind any that would 3997 * be invalid given the new cache-level. This is principally to 3998 * catch the issue of the CS prefetch crossing page boundaries and 3999 * reading an invalid PTE on older architectures. 4000 */ 4001 list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) { 4002 if (!drm_mm_node_allocated(&vma->node)) 4003 continue; 4004 4005 if (vma->pin_count) { 4006 DRM_DEBUG("can not change the cache level of pinned objects\n"); 4007 return -EBUSY; 4008 } 4009 4010 if (!i915_gem_valid_gtt_space(vma, cache_level)) { 4011 ret = i915_vma_unbind(vma); 4012 if (ret) 4013 return ret; 4014 } else 4015 bound = true; 4016 } 4017 4018 /* We can reuse the existing drm_mm nodes but need to change the 4019 * cache-level on the PTE. We could simply unbind them all and 4020 * rebind with the correct cache-level on next use. However since 4021 * we already have a valid slot, dma mapping, pages etc, we may as 4022 * rewrite the PTE in the belief that doing so tramples upon less 4023 * state and so involves less work. 4024 */ 4025 if (bound) { 4026 /* Before we change the PTE, the GPU must not be accessing it. 4027 * If we wait upon the object, we know that all the bound 4028 * VMA are no longer active. 4029 */ 4030 ret = i915_gem_object_wait_rendering(obj, false); 4031 if (ret) 4032 return ret; 4033 4034 if (!HAS_LLC(dev) && cache_level != I915_CACHE_NONE) { 4035 /* Access to snoopable pages through the GTT is 4036 * incoherent and on some machines causes a hard 4037 * lockup. Relinquish the CPU mmaping to force 4038 * userspace to refault in the pages and we can 4039 * then double check if the GTT mapping is still 4040 * valid for that pointer access. 4041 */ 4042 i915_gem_release_mmap(obj); 4043 4044 /* As we no longer need a fence for GTT access, 4045 * we can relinquish it now (and so prevent having 4046 * to steal a fence from someone else on the next 4047 * fence request). Note GPU activity would have 4048 * dropped the fence as all snoopable access is 4049 * supposed to be linear. 4050 */ 4051 ret = i915_gem_object_put_fence(obj); 4052 if (ret) 4053 return ret; 4054 } else { 4055 /* We either have incoherent backing store and 4056 * so no GTT access or the architecture is fully 4057 * coherent. In such cases, existing GTT mmaps 4058 * ignore the cache bit in the PTE and we can 4059 * rewrite it without confusing the GPU or having 4060 * to force userspace to fault back in its mmaps. 4061 */ 4062 } 4063 4064 list_for_each_entry(vma, &obj->vma_list, obj_link) { 4065 if (!drm_mm_node_allocated(&vma->node)) 4066 continue; 4067 4068 ret = i915_vma_bind(vma, cache_level, PIN_UPDATE); 4069 if (ret) 4070 return ret; 4071 } 4072 } 4073 4074 list_for_each_entry(vma, &obj->vma_list, obj_link) 4075 vma->node.color = cache_level; 4076 obj->cache_level = cache_level; 4077 4078 out: 4079 /* Flush the dirty CPU caches to the backing storage so that the 4080 * object is now coherent at its new cache level (with respect 4081 * to the access domain). 4082 */ 4083 if (obj->cache_dirty && 4084 obj->base.write_domain != I915_GEM_DOMAIN_CPU && 4085 cpu_write_needs_clflush(obj)) { 4086 if (i915_gem_clflush_object(obj, true)) 4087 i915_gem_chipset_flush(obj->base.dev); 4088 } 4089 4090 return 0; 4091 } 4092 4093 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 4094 struct drm_file *file) 4095 { 4096 struct drm_i915_gem_caching *args = data; 4097 struct drm_i915_gem_object *obj; 4098 4099 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 4100 if (&obj->base == NULL) 4101 return -ENOENT; 4102 4103 switch (obj->cache_level) { 4104 case I915_CACHE_LLC: 4105 case I915_CACHE_L3_LLC: 4106 args->caching = I915_CACHING_CACHED; 4107 break; 4108 4109 case I915_CACHE_WT: 4110 args->caching = I915_CACHING_DISPLAY; 4111 break; 4112 4113 default: 4114 args->caching = I915_CACHING_NONE; 4115 break; 4116 } 4117 4118 drm_gem_object_unreference_unlocked(&obj->base); 4119 return 0; 4120 } 4121 4122 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 4123 struct drm_file *file) 4124 { 4125 struct drm_i915_private *dev_priv = dev->dev_private; 4126 struct drm_i915_gem_caching *args = data; 4127 struct drm_i915_gem_object *obj; 4128 enum i915_cache_level level; 4129 int ret; 4130 4131 switch (args->caching) { 4132 case I915_CACHING_NONE: 4133 level = I915_CACHE_NONE; 4134 break; 4135 case I915_CACHING_CACHED: 4136 /* 4137 * Due to a HW issue on BXT A stepping, GPU stores via a 4138 * snooped mapping may leave stale data in a corresponding CPU 4139 * cacheline, whereas normally such cachelines would get 4140 * invalidated. 4141 */ 4142 if (!HAS_LLC(dev) && !HAS_SNOOP(dev)) 4143 return -ENODEV; 4144 4145 level = I915_CACHE_LLC; 4146 break; 4147 case I915_CACHING_DISPLAY: 4148 level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE; 4149 break; 4150 default: 4151 return -EINVAL; 4152 } 4153 4154 intel_runtime_pm_get(dev_priv); 4155 4156 ret = i915_mutex_lock_interruptible(dev); 4157 if (ret) 4158 goto rpm_put; 4159 4160 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 4161 if (&obj->base == NULL) { 4162 ret = -ENOENT; 4163 goto unlock; 4164 } 4165 4166 ret = i915_gem_object_set_cache_level(obj, level); 4167 4168 drm_gem_object_unreference(&obj->base); 4169 unlock: 4170 mutex_unlock(&dev->struct_mutex); 4171 rpm_put: 4172 intel_runtime_pm_put(dev_priv); 4173 4174 return ret; 4175 } 4176 4177 /* 4178 * Prepare buffer for display plane (scanout, cursors, etc). 4179 * Can be called from an uninterruptible phase (modesetting) and allows 4180 * any flushes to be pipelined (for pageflips). 4181 */ 4182 int 4183 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 4184 u32 alignment, 4185 const struct i915_ggtt_view *view) 4186 { 4187 u32 old_read_domains, old_write_domain; 4188 int ret; 4189 4190 /* Mark the pin_display early so that we account for the 4191 * display coherency whilst setting up the cache domains. 4192 */ 4193 obj->pin_display++; 4194 4195 /* The display engine is not coherent with the LLC cache on gen6. As 4196 * a result, we make sure that the pinning that is about to occur is 4197 * done with uncached PTEs. This is lowest common denominator for all 4198 * chipsets. 4199 * 4200 * However for gen6+, we could do better by using the GFDT bit instead 4201 * of uncaching, which would allow us to flush all the LLC-cached data 4202 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 4203 */ 4204 ret = i915_gem_object_set_cache_level(obj, 4205 HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE); 4206 if (ret) 4207 goto err_unpin_display; 4208 4209 /* As the user may map the buffer once pinned in the display plane 4210 * (e.g. libkms for the bootup splash), we have to ensure that we 4211 * always use map_and_fenceable for all scanout buffers. 4212 */ 4213 ret = i915_gem_object_ggtt_pin(obj, view, alignment, 4214 view->type == I915_GGTT_VIEW_NORMAL ? 4215 PIN_MAPPABLE : 0); 4216 if (ret) 4217 goto err_unpin_display; 4218 4219 i915_gem_object_flush_cpu_write_domain(obj); 4220 4221 old_write_domain = obj->base.write_domain; 4222 old_read_domains = obj->base.read_domains; 4223 4224 /* It should now be out of any other write domains, and we can update 4225 * the domain values for our changes. 4226 */ 4227 obj->base.write_domain = 0; 4228 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 4229 4230 trace_i915_gem_object_change_domain(obj, 4231 old_read_domains, 4232 old_write_domain); 4233 4234 return 0; 4235 4236 err_unpin_display: 4237 obj->pin_display--; 4238 return ret; 4239 } 4240 4241 void 4242 i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj, 4243 const struct i915_ggtt_view *view) 4244 { 4245 if (WARN_ON(obj->pin_display == 0)) 4246 return; 4247 4248 i915_gem_object_ggtt_unpin_view(obj, view); 4249 4250 obj->pin_display--; 4251 } 4252 4253 /** 4254 * Moves a single object to the CPU read, and possibly write domain. 4255 * 4256 * This function returns when the move is complete, including waiting on 4257 * flushes to occur. 4258 */ 4259 int 4260 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 4261 { 4262 uint32_t old_write_domain, old_read_domains; 4263 int ret; 4264 4265 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) 4266 return 0; 4267 4268 ret = i915_gem_object_wait_rendering(obj, !write); 4269 if (ret) 4270 return ret; 4271 4272 i915_gem_object_flush_gtt_write_domain(obj); 4273 4274 old_write_domain = obj->base.write_domain; 4275 old_read_domains = obj->base.read_domains; 4276 4277 /* Flush the CPU cache if it's still invalid. */ 4278 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { 4279 i915_gem_clflush_object(obj, false); 4280 4281 obj->base.read_domains |= I915_GEM_DOMAIN_CPU; 4282 } 4283 4284 /* It should now be out of any other write domains, and we can update 4285 * the domain values for our changes. 4286 */ 4287 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 4288 4289 /* If we're writing through the CPU, then the GPU read domains will 4290 * need to be invalidated at next use. 4291 */ 4292 if (write) { 4293 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4294 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4295 } 4296 4297 trace_i915_gem_object_change_domain(obj, 4298 old_read_domains, 4299 old_write_domain); 4300 4301 return 0; 4302 } 4303 4304 /* Throttle our rendering by waiting until the ring has completed our requests 4305 * emitted over 20 msec ago. 4306 * 4307 * Note that if we were to use the current jiffies each time around the loop, 4308 * we wouldn't escape the function with any frames outstanding if the time to 4309 * render a frame was over 20ms. 4310 * 4311 * This should get us reasonable parallelism between CPU and GPU but also 4312 * relatively low latency when blocking on a particular request to finish. 4313 */ 4314 static int 4315 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 4316 { 4317 struct drm_i915_private *dev_priv = dev->dev_private; 4318 struct drm_i915_file_private *file_priv = file->driver_priv; 4319 unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES; 4320 struct drm_i915_gem_request *request, *target = NULL; 4321 int ret; 4322 4323 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 4324 if (ret) 4325 return ret; 4326 4327 /* ABI: return -EIO if already wedged */ 4328 if (i915_terminally_wedged(&dev_priv->gpu_error)) 4329 return -EIO; 4330 4331 spin_lock(&file_priv->mm.lock); 4332 list_for_each_entry(request, &file_priv->mm.request_list, client_list) { 4333 if (time_after_eq(request->emitted_jiffies, recent_enough)) 4334 break; 4335 4336 /* 4337 * Note that the request might not have been submitted yet. 4338 * In which case emitted_jiffies will be zero. 4339 */ 4340 if (!request->emitted_jiffies) 4341 continue; 4342 4343 target = request; 4344 } 4345 if (target) 4346 i915_gem_request_reference(target); 4347 spin_unlock(&file_priv->mm.lock); 4348 4349 if (target == NULL) 4350 return 0; 4351 4352 ret = __i915_wait_request(target, true, NULL, NULL); 4353 if (ret == 0) 4354 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0); 4355 4356 i915_gem_request_unreference__unlocked(target); 4357 4358 return ret; 4359 } 4360 4361 static bool 4362 i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags) 4363 { 4364 struct drm_i915_gem_object *obj = vma->obj; 4365 4366 if (alignment && 4367 vma->node.start & (alignment - 1)) 4368 return true; 4369 4370 if (flags & PIN_MAPPABLE && !obj->map_and_fenceable) 4371 return true; 4372 4373 if (flags & PIN_OFFSET_BIAS && 4374 vma->node.start < (flags & PIN_OFFSET_MASK)) 4375 return true; 4376 4377 if (flags & PIN_OFFSET_FIXED && 4378 vma->node.start != (flags & PIN_OFFSET_MASK)) 4379 return true; 4380 4381 return false; 4382 } 4383 4384 void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) 4385 { 4386 struct drm_i915_gem_object *obj = vma->obj; 4387 bool mappable, fenceable; 4388 u32 fence_size, fence_alignment; 4389 4390 fence_size = i915_gem_get_gtt_size(obj->base.dev, 4391 obj->base.size, 4392 obj->tiling_mode); 4393 fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev, 4394 obj->base.size, 4395 obj->tiling_mode, 4396 true); 4397 4398 fenceable = (vma->node.size == fence_size && 4399 (vma->node.start & (fence_alignment - 1)) == 0); 4400 4401 mappable = (vma->node.start + fence_size <= 4402 to_i915(obj->base.dev)->ggtt.mappable_end); 4403 4404 obj->map_and_fenceable = mappable && fenceable; 4405 } 4406 4407 static int 4408 i915_gem_object_do_pin(struct drm_i915_gem_object *obj, 4409 struct i915_address_space *vm, 4410 const struct i915_ggtt_view *ggtt_view, 4411 uint32_t alignment, 4412 uint64_t flags) 4413 { 4414 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 4415 struct i915_vma *vma; 4416 unsigned bound; 4417 int ret; 4418 4419 if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base)) 4420 return -ENODEV; 4421 4422 if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm))) 4423 return -EINVAL; 4424 4425 if (WARN_ON((flags & (PIN_MAPPABLE | PIN_GLOBAL)) == PIN_MAPPABLE)) 4426 return -EINVAL; 4427 4428 if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view)) 4429 return -EINVAL; 4430 4431 vma = ggtt_view ? i915_gem_obj_to_ggtt_view(obj, ggtt_view) : 4432 i915_gem_obj_to_vma(obj, vm); 4433 4434 if (vma) { 4435 if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) 4436 return -EBUSY; 4437 4438 if (i915_vma_misplaced(vma, alignment, flags)) { 4439 WARN(vma->pin_count, 4440 "bo is already pinned in %s with incorrect alignment:" 4441 " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d," 4442 " obj->map_and_fenceable=%d\n", 4443 ggtt_view ? "ggtt" : "ppgtt", 4444 upper_32_bits(vma->node.start), 4445 lower_32_bits(vma->node.start), 4446 alignment, 4447 !!(flags & PIN_MAPPABLE), 4448 obj->map_and_fenceable); 4449 ret = i915_vma_unbind(vma); 4450 if (ret) 4451 return ret; 4452 4453 vma = NULL; 4454 } 4455 } 4456 4457 bound = vma ? vma->bound : 0; 4458 if (vma == NULL || !drm_mm_node_allocated(&vma->node)) { 4459 vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment, 4460 flags); 4461 if (IS_ERR(vma)) 4462 return PTR_ERR(vma); 4463 } else { 4464 ret = i915_vma_bind(vma, obj->cache_level, flags); 4465 if (ret) 4466 return ret; 4467 } 4468 4469 if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL && 4470 (bound ^ vma->bound) & GLOBAL_BIND) { 4471 __i915_vma_set_map_and_fenceable(vma); 4472 WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable); 4473 } 4474 4475 vma->pin_count++; 4476 return 0; 4477 } 4478 4479 int 4480 i915_gem_object_pin(struct drm_i915_gem_object *obj, 4481 struct i915_address_space *vm, 4482 uint32_t alignment, 4483 uint64_t flags) 4484 { 4485 return i915_gem_object_do_pin(obj, vm, 4486 i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL, 4487 alignment, flags); 4488 } 4489 4490 int 4491 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, 4492 const struct i915_ggtt_view *view, 4493 uint32_t alignment, 4494 uint64_t flags) 4495 { 4496 struct drm_device *dev = obj->base.dev; 4497 struct drm_i915_private *dev_priv = to_i915(dev); 4498 struct i915_ggtt *ggtt = &dev_priv->ggtt; 4499 4500 BUG_ON(!view); 4501 4502 return i915_gem_object_do_pin(obj, &ggtt->base, view, 4503 alignment, flags | PIN_GLOBAL); 4504 } 4505 4506 void 4507 i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj, 4508 const struct i915_ggtt_view *view) 4509 { 4510 struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view); 4511 4512 WARN_ON(vma->pin_count == 0); 4513 WARN_ON(!i915_gem_obj_ggtt_bound_view(obj, view)); 4514 4515 --vma->pin_count; 4516 } 4517 4518 int 4519 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 4520 struct drm_file *file) 4521 { 4522 struct drm_i915_gem_busy *args = data; 4523 struct drm_i915_gem_object *obj; 4524 int ret; 4525 4526 ret = i915_mutex_lock_interruptible(dev); 4527 if (ret) 4528 return ret; 4529 4530 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 4531 if (&obj->base == NULL) { 4532 ret = -ENOENT; 4533 goto unlock; 4534 } 4535 4536 /* Count all active objects as busy, even if they are currently not used 4537 * by the gpu. Users of this interface expect objects to eventually 4538 * become non-busy without any further actions, therefore emit any 4539 * necessary flushes here. 4540 */ 4541 ret = i915_gem_object_flush_active(obj); 4542 if (ret) 4543 goto unref; 4544 4545 args->busy = 0; 4546 if (obj->active) { 4547 int i; 4548 4549 for (i = 0; i < I915_NUM_ENGINES; i++) { 4550 struct drm_i915_gem_request *req; 4551 4552 req = obj->last_read_req[i]; 4553 if (req) 4554 args->busy |= 1 << (16 + req->engine->exec_id); 4555 } 4556 if (obj->last_write_req) 4557 args->busy |= obj->last_write_req->engine->exec_id; 4558 } 4559 4560 unref: 4561 drm_gem_object_unreference(&obj->base); 4562 unlock: 4563 mutex_unlock(&dev->struct_mutex); 4564 return ret; 4565 } 4566 4567 int 4568 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 4569 struct drm_file *file_priv) 4570 { 4571 return i915_gem_ring_throttle(dev, file_priv); 4572 } 4573 4574 int 4575 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 4576 struct drm_file *file_priv) 4577 { 4578 struct drm_i915_private *dev_priv = dev->dev_private; 4579 struct drm_i915_gem_madvise *args = data; 4580 struct drm_i915_gem_object *obj; 4581 int ret; 4582 4583 switch (args->madv) { 4584 case I915_MADV_DONTNEED: 4585 case I915_MADV_WILLNEED: 4586 break; 4587 default: 4588 return -EINVAL; 4589 } 4590 4591 ret = i915_mutex_lock_interruptible(dev); 4592 if (ret) 4593 return ret; 4594 4595 obj = to_intel_bo(drm_gem_object_lookup(file_priv, args->handle)); 4596 if (&obj->base == NULL) { 4597 ret = -ENOENT; 4598 goto unlock; 4599 } 4600 4601 if (i915_gem_obj_is_pinned(obj)) { 4602 ret = -EINVAL; 4603 goto out; 4604 } 4605 4606 if (obj->pages && 4607 obj->tiling_mode != I915_TILING_NONE && 4608 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 4609 if (obj->madv == I915_MADV_WILLNEED) 4610 i915_gem_object_unpin_pages(obj); 4611 if (args->madv == I915_MADV_WILLNEED) 4612 i915_gem_object_pin_pages(obj); 4613 } 4614 4615 if (obj->madv != __I915_MADV_PURGED) 4616 obj->madv = args->madv; 4617 4618 /* if the object is no longer attached, discard its backing storage */ 4619 if (obj->madv == I915_MADV_DONTNEED && obj->pages == NULL) 4620 i915_gem_object_truncate(obj); 4621 4622 args->retained = obj->madv != __I915_MADV_PURGED; 4623 4624 out: 4625 drm_gem_object_unreference(&obj->base); 4626 unlock: 4627 mutex_unlock(&dev->struct_mutex); 4628 return ret; 4629 } 4630 4631 void i915_gem_object_init(struct drm_i915_gem_object *obj, 4632 const struct drm_i915_gem_object_ops *ops) 4633 { 4634 int i; 4635 4636 INIT_LIST_HEAD(&obj->global_list); 4637 for (i = 0; i < I915_NUM_ENGINES; i++) 4638 INIT_LIST_HEAD(&obj->engine_list[i]); 4639 INIT_LIST_HEAD(&obj->obj_exec_link); 4640 INIT_LIST_HEAD(&obj->vma_list); 4641 INIT_LIST_HEAD(&obj->batch_pool_link); 4642 4643 obj->ops = ops; 4644 4645 obj->fence_reg = I915_FENCE_REG_NONE; 4646 obj->madv = I915_MADV_WILLNEED; 4647 4648 i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size); 4649 } 4650 4651 static const struct drm_i915_gem_object_ops i915_gem_object_ops = { 4652 .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE, 4653 .get_pages = i915_gem_object_get_pages_gtt, 4654 .put_pages = i915_gem_object_put_pages_gtt, 4655 }; 4656 4657 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, 4658 size_t size) 4659 { 4660 struct drm_i915_gem_object *obj; 4661 #if 0 4662 struct address_space *mapping; 4663 gfp_t mask; 4664 #endif 4665 4666 obj = i915_gem_object_alloc(dev); 4667 if (obj == NULL) 4668 return NULL; 4669 4670 if (drm_gem_object_init(dev, &obj->base, size) != 0) { 4671 i915_gem_object_free(obj); 4672 return NULL; 4673 } 4674 4675 #if 0 4676 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 4677 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) { 4678 /* 965gm cannot relocate objects above 4GiB. */ 4679 mask &= ~__GFP_HIGHMEM; 4680 mask |= __GFP_DMA32; 4681 } 4682 4683 mapping = file_inode(obj->base.filp)->i_mapping; 4684 mapping_set_gfp_mask(mapping, mask); 4685 #endif 4686 4687 i915_gem_object_init(obj, &i915_gem_object_ops); 4688 4689 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4690 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4691 4692 if (HAS_LLC(dev)) { 4693 /* On some devices, we can have the GPU use the LLC (the CPU 4694 * cache) for about a 10% performance improvement 4695 * compared to uncached. Graphics requests other than 4696 * display scanout are coherent with the CPU in 4697 * accessing this cache. This means in this mode we 4698 * don't need to clflush on the CPU side, and on the 4699 * GPU side we only need to flush internal caches to 4700 * get data visible to the CPU. 4701 * 4702 * However, we maintain the display planes as UC, and so 4703 * need to rebind when first used as such. 4704 */ 4705 obj->cache_level = I915_CACHE_LLC; 4706 } else 4707 obj->cache_level = I915_CACHE_NONE; 4708 4709 trace_i915_gem_object_create(obj); 4710 4711 return obj; 4712 } 4713 4714 static bool discard_backing_storage(struct drm_i915_gem_object *obj) 4715 { 4716 /* If we are the last user of the backing storage (be it shmemfs 4717 * pages or stolen etc), we know that the pages are going to be 4718 * immediately released. In this case, we can then skip copying 4719 * back the contents from the GPU. 4720 */ 4721 4722 if (obj->madv != I915_MADV_WILLNEED) 4723 return false; 4724 4725 if (obj->base.filp == NULL) 4726 return true; 4727 4728 /* At first glance, this looks racy, but then again so would be 4729 * userspace racing mmap against close. However, the first external 4730 * reference to the filp can only be obtained through the 4731 * i915_gem_mmap_ioctl() which safeguards us against the user 4732 * acquiring such a reference whilst we are in the middle of 4733 * freeing the object. 4734 */ 4735 #if 0 4736 return atomic_long_read(&obj->base.filp->f_count) == 1; 4737 #else 4738 return false; 4739 #endif 4740 } 4741 4742 void i915_gem_free_object(struct drm_gem_object *gem_obj) 4743 { 4744 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 4745 struct drm_device *dev = obj->base.dev; 4746 struct drm_i915_private *dev_priv = dev->dev_private; 4747 struct i915_vma *vma, *next; 4748 4749 intel_runtime_pm_get(dev_priv); 4750 4751 trace_i915_gem_object_destroy(obj); 4752 4753 list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) { 4754 int ret; 4755 4756 vma->pin_count = 0; 4757 ret = i915_vma_unbind(vma); 4758 if (WARN_ON(ret == -ERESTARTSYS)) { 4759 bool was_interruptible; 4760 4761 was_interruptible = dev_priv->mm.interruptible; 4762 dev_priv->mm.interruptible = false; 4763 4764 WARN_ON(i915_vma_unbind(vma)); 4765 4766 dev_priv->mm.interruptible = was_interruptible; 4767 } 4768 } 4769 4770 /* Stolen objects don't hold a ref, but do hold pin count. Fix that up 4771 * before progressing. */ 4772 if (obj->stolen) 4773 i915_gem_object_unpin_pages(obj); 4774 4775 WARN_ON(obj->frontbuffer_bits); 4776 4777 if (obj->pages && obj->madv == I915_MADV_WILLNEED && 4778 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES && 4779 obj->tiling_mode != I915_TILING_NONE) 4780 i915_gem_object_unpin_pages(obj); 4781 4782 if (WARN_ON(obj->pages_pin_count)) 4783 obj->pages_pin_count = 0; 4784 if (discard_backing_storage(obj)) 4785 obj->madv = I915_MADV_DONTNEED; 4786 i915_gem_object_put_pages(obj); 4787 i915_gem_object_free_mmap_offset(obj); 4788 4789 BUG_ON(obj->pages); 4790 4791 #if 0 4792 if (obj->base.import_attach) 4793 drm_prime_gem_destroy(&obj->base, NULL); 4794 #endif 4795 4796 if (obj->ops->release) 4797 obj->ops->release(obj); 4798 4799 drm_gem_object_release(&obj->base); 4800 i915_gem_info_remove_obj(dev_priv, obj->base.size); 4801 4802 kfree(obj->bit_17); 4803 i915_gem_object_free(obj); 4804 4805 intel_runtime_pm_put(dev_priv); 4806 } 4807 4808 struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, 4809 struct i915_address_space *vm) 4810 { 4811 struct i915_vma *vma; 4812 list_for_each_entry(vma, &obj->vma_list, obj_link) { 4813 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL && 4814 vma->vm == vm) 4815 return vma; 4816 } 4817 return NULL; 4818 } 4819 4820 struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj, 4821 const struct i915_ggtt_view *view) 4822 { 4823 struct drm_device *dev = obj->base.dev; 4824 struct drm_i915_private *dev_priv = to_i915(dev); 4825 struct i915_ggtt *ggtt = &dev_priv->ggtt; 4826 struct i915_vma *vma; 4827 4828 BUG_ON(!view); 4829 4830 list_for_each_entry(vma, &obj->vma_list, obj_link) 4831 if (vma->vm == &ggtt->base && 4832 i915_ggtt_view_equal(&vma->ggtt_view, view)) 4833 return vma; 4834 return NULL; 4835 } 4836 4837 void i915_gem_vma_destroy(struct i915_vma *vma) 4838 { 4839 WARN_ON(vma->node.allocated); 4840 4841 /* Keep the vma as a placeholder in the execbuffer reservation lists */ 4842 if (!list_empty(&vma->exec_list)) 4843 return; 4844 4845 if (!vma->is_ggtt) 4846 i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm)); 4847 4848 list_del(&vma->obj_link); 4849 4850 kfree(vma); 4851 } 4852 4853 static void 4854 i915_gem_stop_engines(struct drm_device *dev) 4855 { 4856 struct drm_i915_private *dev_priv = dev->dev_private; 4857 struct intel_engine_cs *engine; 4858 4859 for_each_engine(engine, dev_priv) 4860 dev_priv->gt.stop_engine(engine); 4861 } 4862 4863 int 4864 i915_gem_suspend(struct drm_device *dev) 4865 { 4866 struct drm_i915_private *dev_priv = dev->dev_private; 4867 int ret = 0; 4868 4869 mutex_lock(&dev->struct_mutex); 4870 ret = i915_gpu_idle(dev); 4871 if (ret) 4872 goto err; 4873 4874 i915_gem_retire_requests(dev); 4875 4876 i915_gem_stop_engines(dev); 4877 mutex_unlock(&dev->struct_mutex); 4878 4879 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); 4880 cancel_delayed_work_sync(&dev_priv->mm.retire_work); 4881 flush_delayed_work(&dev_priv->mm.idle_work); 4882 4883 /* Assert that we sucessfully flushed all the work and 4884 * reset the GPU back to its idle, low power state. 4885 */ 4886 WARN_ON(dev_priv->mm.busy); 4887 4888 return 0; 4889 4890 err: 4891 mutex_unlock(&dev->struct_mutex); 4892 return ret; 4893 } 4894 4895 int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice) 4896 { 4897 struct intel_engine_cs *engine = req->engine; 4898 struct drm_device *dev = engine->dev; 4899 struct drm_i915_private *dev_priv = dev->dev_private; 4900 u32 *remap_info = dev_priv->l3_parity.remap_info[slice]; 4901 int i, ret; 4902 4903 if (!HAS_L3_DPF(dev) || !remap_info) 4904 return 0; 4905 4906 ret = intel_ring_begin(req, GEN7_L3LOG_SIZE / 4 * 3); 4907 if (ret) 4908 return ret; 4909 4910 /* 4911 * Note: We do not worry about the concurrent register cacheline hang 4912 * here because no other code should access these registers other than 4913 * at initialization time. 4914 */ 4915 for (i = 0; i < GEN7_L3LOG_SIZE / 4; i++) { 4916 intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1)); 4917 intel_ring_emit_reg(engine, GEN7_L3LOG(slice, i)); 4918 intel_ring_emit(engine, remap_info[i]); 4919 } 4920 4921 intel_ring_advance(engine); 4922 4923 return ret; 4924 } 4925 4926 void i915_gem_init_swizzling(struct drm_device *dev) 4927 { 4928 struct drm_i915_private *dev_priv = dev->dev_private; 4929 4930 if (INTEL_INFO(dev)->gen < 5 || 4931 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 4932 return; 4933 4934 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 4935 DISP_TILE_SURFACE_SWIZZLING); 4936 4937 if (IS_GEN5(dev)) 4938 return; 4939 4940 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 4941 if (IS_GEN6(dev)) 4942 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 4943 else if (IS_GEN7(dev)) 4944 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 4945 else if (IS_GEN8(dev)) 4946 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW)); 4947 else 4948 BUG(); 4949 } 4950 4951 static void init_unused_ring(struct drm_device *dev, u32 base) 4952 { 4953 struct drm_i915_private *dev_priv = dev->dev_private; 4954 4955 I915_WRITE(RING_CTL(base), 0); 4956 I915_WRITE(RING_HEAD(base), 0); 4957 I915_WRITE(RING_TAIL(base), 0); 4958 I915_WRITE(RING_START(base), 0); 4959 } 4960 4961 static void init_unused_rings(struct drm_device *dev) 4962 { 4963 if (IS_I830(dev)) { 4964 init_unused_ring(dev, PRB1_BASE); 4965 init_unused_ring(dev, SRB0_BASE); 4966 init_unused_ring(dev, SRB1_BASE); 4967 init_unused_ring(dev, SRB2_BASE); 4968 init_unused_ring(dev, SRB3_BASE); 4969 } else if (IS_GEN2(dev)) { 4970 init_unused_ring(dev, SRB0_BASE); 4971 init_unused_ring(dev, SRB1_BASE); 4972 } else if (IS_GEN3(dev)) { 4973 init_unused_ring(dev, PRB1_BASE); 4974 init_unused_ring(dev, PRB2_BASE); 4975 } 4976 } 4977 4978 int i915_gem_init_engines(struct drm_device *dev) 4979 { 4980 struct drm_i915_private *dev_priv = dev->dev_private; 4981 int ret; 4982 4983 ret = intel_init_render_ring_buffer(dev); 4984 if (ret) 4985 return ret; 4986 4987 if (HAS_BSD(dev)) { 4988 ret = intel_init_bsd_ring_buffer(dev); 4989 if (ret) 4990 goto cleanup_render_ring; 4991 } 4992 4993 if (HAS_BLT(dev)) { 4994 ret = intel_init_blt_ring_buffer(dev); 4995 if (ret) 4996 goto cleanup_bsd_ring; 4997 } 4998 4999 if (HAS_VEBOX(dev)) { 5000 ret = intel_init_vebox_ring_buffer(dev); 5001 if (ret) 5002 goto cleanup_blt_ring; 5003 } 5004 5005 if (HAS_BSD2(dev)) { 5006 ret = intel_init_bsd2_ring_buffer(dev); 5007 if (ret) 5008 goto cleanup_vebox_ring; 5009 } 5010 5011 return 0; 5012 5013 cleanup_vebox_ring: 5014 intel_cleanup_engine(&dev_priv->engine[VECS]); 5015 cleanup_blt_ring: 5016 intel_cleanup_engine(&dev_priv->engine[BCS]); 5017 cleanup_bsd_ring: 5018 intel_cleanup_engine(&dev_priv->engine[VCS]); 5019 cleanup_render_ring: 5020 intel_cleanup_engine(&dev_priv->engine[RCS]); 5021 5022 return ret; 5023 } 5024 5025 int 5026 i915_gem_init_hw(struct drm_device *dev) 5027 { 5028 struct drm_i915_private *dev_priv = dev->dev_private; 5029 struct intel_engine_cs *engine; 5030 int ret, j; 5031 5032 /* Double layer security blanket, see i915_gem_init() */ 5033 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5034 5035 if (HAS_EDRAM(dev) && INTEL_GEN(dev_priv) < 9) 5036 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf)); 5037 5038 if (IS_HASWELL(dev)) 5039 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev) ? 5040 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); 5041 5042 if (HAS_PCH_NOP(dev)) { 5043 if (IS_IVYBRIDGE(dev)) { 5044 u32 temp = I915_READ(GEN7_MSG_CTL); 5045 temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK); 5046 I915_WRITE(GEN7_MSG_CTL, temp); 5047 } else if (INTEL_INFO(dev)->gen >= 7) { 5048 u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT); 5049 temp &= ~RESET_PCH_HANDSHAKE_ENABLE; 5050 I915_WRITE(HSW_NDE_RSTWRN_OPT, temp); 5051 } 5052 } 5053 5054 i915_gem_init_swizzling(dev); 5055 5056 /* 5057 * At least 830 can leave some of the unused rings 5058 * "active" (ie. head != tail) after resume which 5059 * will prevent c3 entry. Makes sure all unused rings 5060 * are totally idle. 5061 */ 5062 init_unused_rings(dev); 5063 5064 BUG_ON(!dev_priv->kernel_context); 5065 5066 ret = i915_ppgtt_init_hw(dev); 5067 if (ret) { 5068 DRM_ERROR("PPGTT enable HW failed %d\n", ret); 5069 goto out; 5070 } 5071 5072 /* Need to do basic initialisation of all rings first: */ 5073 for_each_engine(engine, dev_priv) { 5074 ret = engine->init_hw(engine); 5075 if (ret) 5076 goto out; 5077 } 5078 5079 intel_mocs_init_l3cc_table(dev); 5080 5081 /* We can't enable contexts until all firmware is loaded */ 5082 if (HAS_GUC_UCODE(dev)) { 5083 ret = intel_guc_ucode_load(dev); 5084 if (ret) { 5085 DRM_ERROR("Failed to initialize GuC, error %d\n", ret); 5086 ret = -EIO; 5087 goto out; 5088 } 5089 } 5090 5091 /* 5092 * Increment the next seqno by 0x100 so we have a visible break 5093 * on re-initialisation 5094 */ 5095 ret = i915_gem_set_seqno(dev, dev_priv->next_seqno+0x100); 5096 if (ret) 5097 goto out; 5098 5099 /* Now it is safe to go back round and do everything else: */ 5100 for_each_engine(engine, dev_priv) { 5101 struct drm_i915_gem_request *req; 5102 5103 req = i915_gem_request_alloc(engine, NULL); 5104 if (IS_ERR(req)) { 5105 ret = PTR_ERR(req); 5106 break; 5107 } 5108 5109 if (engine->id == RCS) { 5110 for (j = 0; j < NUM_L3_SLICES(dev); j++) { 5111 ret = i915_gem_l3_remap(req, j); 5112 if (ret) 5113 goto err_request; 5114 } 5115 } 5116 5117 ret = i915_ppgtt_init_ring(req); 5118 if (ret) 5119 goto err_request; 5120 5121 ret = i915_gem_context_enable(req); 5122 if (ret) 5123 goto err_request; 5124 5125 err_request: 5126 i915_add_request_no_flush(req); 5127 if (ret) { 5128 DRM_ERROR("Failed to enable %s, error=%d\n", 5129 engine->name, ret); 5130 i915_gem_cleanup_engines(dev); 5131 break; 5132 } 5133 } 5134 5135 out: 5136 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5137 return ret; 5138 } 5139 5140 int i915_gem_init(struct drm_device *dev) 5141 { 5142 struct drm_i915_private *dev_priv = dev->dev_private; 5143 int ret; 5144 5145 i915.enable_execlists = intel_sanitize_enable_execlists(dev, 5146 i915.enable_execlists); 5147 5148 mutex_lock(&dev->struct_mutex); 5149 5150 if (!i915.enable_execlists) { 5151 dev_priv->gt.execbuf_submit = i915_gem_ringbuffer_submission; 5152 dev_priv->gt.init_engines = i915_gem_init_engines; 5153 dev_priv->gt.cleanup_engine = intel_cleanup_engine; 5154 dev_priv->gt.stop_engine = intel_stop_engine; 5155 } else { 5156 dev_priv->gt.execbuf_submit = intel_execlists_submission; 5157 dev_priv->gt.init_engines = intel_logical_rings_init; 5158 dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup; 5159 dev_priv->gt.stop_engine = intel_logical_ring_stop; 5160 } 5161 5162 /* This is just a security blanket to placate dragons. 5163 * On some systems, we very sporadically observe that the first TLBs 5164 * used by the CS may be stale, despite us poking the TLB reset. If 5165 * we hold the forcewake during initialisation these problems 5166 * just magically go away. 5167 */ 5168 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5169 5170 ret = i915_gem_init_userptr(dev); 5171 if (ret) 5172 goto out_unlock; 5173 5174 i915_gem_init_ggtt(dev); 5175 5176 ret = i915_gem_context_init(dev); 5177 if (ret) 5178 goto out_unlock; 5179 5180 ret = dev_priv->gt.init_engines(dev); 5181 if (ret) 5182 goto out_unlock; 5183 5184 ret = i915_gem_init_hw(dev); 5185 if (ret == -EIO) { 5186 /* Allow ring initialisation to fail by marking the GPU as 5187 * wedged. But we only want to do this where the GPU is angry, 5188 * for all other failure, such as an allocation failure, bail. 5189 */ 5190 DRM_ERROR("Failed to initialize GPU, declaring it wedged\n"); 5191 atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter); 5192 ret = 0; 5193 } 5194 5195 out_unlock: 5196 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5197 mutex_unlock(&dev->struct_mutex); 5198 5199 return ret; 5200 } 5201 5202 void 5203 i915_gem_cleanup_engines(struct drm_device *dev) 5204 { 5205 struct drm_i915_private *dev_priv = dev->dev_private; 5206 struct intel_engine_cs *engine; 5207 5208 for_each_engine(engine, dev_priv) 5209 dev_priv->gt.cleanup_engine(engine); 5210 5211 if (i915.enable_execlists) 5212 /* 5213 * Neither the BIOS, ourselves or any other kernel 5214 * expects the system to be in execlists mode on startup, 5215 * so we need to reset the GPU back to legacy mode. 5216 */ 5217 intel_gpu_reset(dev, ALL_ENGINES); 5218 } 5219 5220 static void 5221 init_engine_lists(struct intel_engine_cs *engine) 5222 { 5223 INIT_LIST_HEAD(&engine->active_list); 5224 INIT_LIST_HEAD(&engine->request_list); 5225 } 5226 5227 void 5228 i915_gem_load_init_fences(struct drm_i915_private *dev_priv) 5229 { 5230 struct drm_device *dev = dev_priv->dev; 5231 5232 if (INTEL_INFO(dev_priv)->gen >= 7 && !IS_VALLEYVIEW(dev_priv) && 5233 !IS_CHERRYVIEW(dev_priv)) 5234 dev_priv->num_fence_regs = 32; 5235 else if (INTEL_INFO(dev_priv)->gen >= 4 || IS_I945G(dev_priv) || 5236 IS_I945GM(dev_priv) || IS_G33(dev_priv)) 5237 dev_priv->num_fence_regs = 16; 5238 else 5239 dev_priv->num_fence_regs = 8; 5240 5241 if (intel_vgpu_active(dev)) 5242 dev_priv->num_fence_regs = 5243 I915_READ(vgtif_reg(avail_rs.fence_num)); 5244 5245 /* Initialize fence registers to zero */ 5246 i915_gem_restore_fences(dev); 5247 5248 i915_gem_detect_bit_6_swizzle(dev); 5249 } 5250 5251 void 5252 i915_gem_load_init(struct drm_device *dev) 5253 { 5254 struct drm_i915_private *dev_priv = dev->dev_private; 5255 int i; 5256 5257 INIT_LIST_HEAD(&dev_priv->vm_list); 5258 INIT_LIST_HEAD(&dev_priv->context_list); 5259 INIT_LIST_HEAD(&dev_priv->mm.unbound_list); 5260 INIT_LIST_HEAD(&dev_priv->mm.bound_list); 5261 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 5262 for (i = 0; i < I915_NUM_ENGINES; i++) 5263 init_engine_lists(&dev_priv->engine[i]); 5264 for (i = 0; i < I915_MAX_NUM_FENCES; i++) 5265 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); 5266 INIT_DELAYED_WORK(&dev_priv->mm.retire_work, 5267 i915_gem_retire_work_handler); 5268 INIT_DELAYED_WORK(&dev_priv->mm.idle_work, 5269 i915_gem_idle_work_handler); 5270 init_waitqueue_head(&dev_priv->gpu_error.reset_queue); 5271 5272 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; 5273 5274 /* 5275 * Set initial sequence number for requests. 5276 * Using this number allows the wraparound to happen early, 5277 * catching any obvious problems. 5278 */ 5279 dev_priv->next_seqno = ((u32)~0 - 0x1100); 5280 dev_priv->last_seqno = ((u32)~0 - 0x1101); 5281 5282 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 5283 5284 init_waitqueue_head(&dev_priv->pending_flip_queue); 5285 5286 dev_priv->mm.interruptible = true; 5287 5288 lockinit(&dev_priv->fb_tracking.lock, "drmftl", 0, LK_CANRECURSE); 5289 } 5290 5291 void i915_gem_load_cleanup(struct drm_device *dev) 5292 { 5293 #if 0 5294 struct drm_i915_private *dev_priv = to_i915(dev); 5295 5296 kmem_cache_destroy(dev_priv->requests); 5297 kmem_cache_destroy(dev_priv->vmas); 5298 kmem_cache_destroy(dev_priv->objects); 5299 #endif 5300 } 5301 5302 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 5303 { 5304 struct drm_i915_file_private *file_priv = file->driver_priv; 5305 5306 /* Clean up our request list when the client is going away, so that 5307 * later retire_requests won't dereference our soon-to-be-gone 5308 * file_priv. 5309 */ 5310 spin_lock(&file_priv->mm.lock); 5311 while (!list_empty(&file_priv->mm.request_list)) { 5312 struct drm_i915_gem_request *request; 5313 5314 request = list_first_entry(&file_priv->mm.request_list, 5315 struct drm_i915_gem_request, 5316 client_list); 5317 list_del(&request->client_list); 5318 request->file_priv = NULL; 5319 } 5320 spin_unlock(&file_priv->mm.lock); 5321 5322 if (!list_empty(&file_priv->rps.link)) { 5323 lockmgr(&to_i915(dev)->rps.client_lock, LK_EXCLUSIVE); 5324 list_del(&file_priv->rps.link); 5325 lockmgr(&to_i915(dev)->rps.client_lock, LK_RELEASE); 5326 } 5327 } 5328 5329 int 5330 i915_gem_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, 5331 vm_ooffset_t foff, struct ucred *cred, u_short *color) 5332 { 5333 *color = 0; /* XXXKIB */ 5334 return (0); 5335 } 5336 5337 void 5338 i915_gem_pager_dtor(void *handle) 5339 { 5340 struct drm_gem_object *obj; 5341 struct drm_device *dev; 5342 5343 obj = handle; 5344 dev = obj->dev; 5345 5346 mutex_lock(&dev->struct_mutex); 5347 drm_gem_free_mmap_offset(obj); 5348 i915_gem_release_mmap(to_intel_bo(obj)); 5349 drm_gem_object_unreference(obj); 5350 mutex_unlock(&dev->struct_mutex); 5351 } 5352 5353 int i915_gem_open(struct drm_device *dev, struct drm_file *file) 5354 { 5355 struct drm_i915_file_private *file_priv; 5356 int ret; 5357 5358 DRM_DEBUG_DRIVER("\n"); 5359 5360 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 5361 if (!file_priv) 5362 return -ENOMEM; 5363 5364 file->driver_priv = file_priv; 5365 file_priv->dev_priv = dev->dev_private; 5366 file_priv->file = file; 5367 INIT_LIST_HEAD(&file_priv->rps.link); 5368 5369 spin_init(&file_priv->mm.lock, "i915_priv"); 5370 INIT_LIST_HEAD(&file_priv->mm.request_list); 5371 5372 file_priv->bsd_ring = -1; 5373 5374 ret = i915_gem_context_open(dev, file); 5375 if (ret) 5376 kfree(file_priv); 5377 5378 return ret; 5379 } 5380 5381 /** 5382 * i915_gem_track_fb - update frontbuffer tracking 5383 * @old: current GEM buffer for the frontbuffer slots 5384 * @new: new GEM buffer for the frontbuffer slots 5385 * @frontbuffer_bits: bitmask of frontbuffer slots 5386 * 5387 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them 5388 * from @old and setting them in @new. Both @old and @new can be NULL. 5389 */ 5390 void i915_gem_track_fb(struct drm_i915_gem_object *old, 5391 struct drm_i915_gem_object *new, 5392 unsigned frontbuffer_bits) 5393 { 5394 if (old) { 5395 WARN_ON(!mutex_is_locked(&old->base.dev->struct_mutex)); 5396 WARN_ON(!(old->frontbuffer_bits & frontbuffer_bits)); 5397 old->frontbuffer_bits &= ~frontbuffer_bits; 5398 } 5399 5400 if (new) { 5401 WARN_ON(!mutex_is_locked(&new->base.dev->struct_mutex)); 5402 WARN_ON(new->frontbuffer_bits & frontbuffer_bits); 5403 new->frontbuffer_bits |= frontbuffer_bits; 5404 } 5405 } 5406 5407 /* All the new VM stuff */ 5408 u64 i915_gem_obj_offset(struct drm_i915_gem_object *o, 5409 struct i915_address_space *vm) 5410 { 5411 struct drm_i915_private *dev_priv = o->base.dev->dev_private; 5412 struct i915_vma *vma; 5413 5414 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); 5415 5416 list_for_each_entry(vma, &o->vma_list, obj_link) { 5417 if (vma->is_ggtt && 5418 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5419 continue; 5420 if (vma->vm == vm) 5421 return vma->node.start; 5422 } 5423 5424 WARN(1, "%s vma for this object not found.\n", 5425 i915_is_ggtt(vm) ? "global" : "ppgtt"); 5426 return -1; 5427 } 5428 5429 u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o, 5430 const struct i915_ggtt_view *view) 5431 { 5432 struct drm_i915_private *dev_priv = to_i915(o->base.dev); 5433 struct i915_ggtt *ggtt = &dev_priv->ggtt; 5434 struct i915_vma *vma; 5435 5436 list_for_each_entry(vma, &o->vma_list, obj_link) 5437 if (vma->vm == &ggtt->base && 5438 i915_ggtt_view_equal(&vma->ggtt_view, view)) 5439 return vma->node.start; 5440 5441 WARN(1, "global vma for this object not found. (view=%u)\n", view->type); 5442 return -1; 5443 } 5444 5445 bool i915_gem_obj_bound(struct drm_i915_gem_object *o, 5446 struct i915_address_space *vm) 5447 { 5448 struct i915_vma *vma; 5449 5450 list_for_each_entry(vma, &o->vma_list, obj_link) { 5451 if (vma->is_ggtt && 5452 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5453 continue; 5454 if (vma->vm == vm && drm_mm_node_allocated(&vma->node)) 5455 return true; 5456 } 5457 5458 return false; 5459 } 5460 5461 bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o, 5462 const struct i915_ggtt_view *view) 5463 { 5464 struct drm_i915_private *dev_priv = to_i915(o->base.dev); 5465 struct i915_ggtt *ggtt = &dev_priv->ggtt; 5466 struct i915_vma *vma; 5467 5468 list_for_each_entry(vma, &o->vma_list, obj_link) 5469 if (vma->vm == &ggtt->base && 5470 i915_ggtt_view_equal(&vma->ggtt_view, view) && 5471 drm_mm_node_allocated(&vma->node)) 5472 return true; 5473 5474 return false; 5475 } 5476 5477 bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o) 5478 { 5479 struct i915_vma *vma; 5480 5481 list_for_each_entry(vma, &o->vma_list, obj_link) 5482 if (drm_mm_node_allocated(&vma->node)) 5483 return true; 5484 5485 return false; 5486 } 5487 5488 unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o, 5489 struct i915_address_space *vm) 5490 { 5491 struct drm_i915_private *dev_priv = o->base.dev->dev_private; 5492 struct i915_vma *vma; 5493 5494 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); 5495 5496 BUG_ON(list_empty(&o->vma_list)); 5497 5498 list_for_each_entry(vma, &o->vma_list, obj_link) { 5499 if (vma->is_ggtt && 5500 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5501 continue; 5502 if (vma->vm == vm) 5503 return vma->node.size; 5504 } 5505 return 0; 5506 } 5507 5508 bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj) 5509 { 5510 struct i915_vma *vma; 5511 list_for_each_entry(vma, &obj->vma_list, obj_link) 5512 if (vma->pin_count > 0) 5513 return true; 5514 5515 return false; 5516 } 5517 5518 /* Like i915_gem_object_get_page(), but mark the returned page dirty */ 5519 struct page * 5520 i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n) 5521 { 5522 struct page *page; 5523 5524 /* Only default objects have per-page dirty tracking */ 5525 if (WARN_ON((obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE) == 0)) 5526 return NULL; 5527 5528 page = i915_gem_object_get_page(obj, n); 5529 set_page_dirty(page); 5530 return page; 5531 } 5532 5533 /* Allocate a new GEM object and fill it with the supplied data */ 5534 struct drm_i915_gem_object * 5535 i915_gem_object_create_from_data(struct drm_device *dev, 5536 const void *data, size_t size) 5537 { 5538 struct drm_i915_gem_object *obj; 5539 struct sg_table *sg; 5540 size_t bytes; 5541 int ret; 5542 5543 obj = i915_gem_alloc_object(dev, round_up(size, PAGE_SIZE)); 5544 if (IS_ERR_OR_NULL(obj)) 5545 return obj; 5546 5547 ret = i915_gem_object_set_to_cpu_domain(obj, true); 5548 if (ret) 5549 goto fail; 5550 5551 ret = i915_gem_object_get_pages(obj); 5552 if (ret) 5553 goto fail; 5554 5555 i915_gem_object_pin_pages(obj); 5556 sg = obj->pages; 5557 bytes = sg_copy_from_buffer(sg->sgl, sg->nents, data, size); 5558 obj->dirty = 1; /* Backing store is now out of date */ 5559 i915_gem_object_unpin_pages(obj); 5560 5561 if (WARN_ON(bytes != size)) { 5562 DRM_ERROR("Incomplete copy, wrote %zu of %zu", bytes, size); 5563 ret = -EFAULT; 5564 goto fail; 5565 } 5566 5567 return obj; 5568 5569 fail: 5570 drm_gem_object_unreference(&obj->base); 5571 return ERR_PTR(ret); 5572 } 5573