1 /* 2 * Copyright © 2008-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include <drm/drmP.h> 29 #include <drm/drm_vma_manager.h> 30 #include <drm/i915_drm.h> 31 #include "i915_drv.h" 32 #include "i915_vgpu.h" 33 #include "i915_trace.h" 34 #include "intel_drv.h" 35 #include "intel_mocs.h" 36 #include <linux/shmem_fs.h> 37 #include <linux/slab.h> 38 #include <linux/swap.h> 39 #include <linux/pci.h> 40 #include <linux/dma-buf.h> 41 42 #include <sys/mman.h> 43 #include <vm/vm_map.h> 44 #include <vm/vm_param.h> 45 46 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); 47 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); 48 static void 49 i915_gem_object_retire__write(struct drm_i915_gem_object *obj); 50 static void 51 i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring); 52 53 static bool cpu_cache_is_coherent(struct drm_device *dev, 54 enum i915_cache_level level) 55 { 56 return HAS_LLC(dev) || level != I915_CACHE_NONE; 57 } 58 59 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 60 { 61 if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) 62 return true; 63 64 return obj->pin_display; 65 } 66 67 /* some bookkeeping */ 68 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 69 size_t size) 70 { 71 spin_lock(&dev_priv->mm.object_stat_lock); 72 dev_priv->mm.object_count++; 73 dev_priv->mm.object_memory += size; 74 spin_unlock(&dev_priv->mm.object_stat_lock); 75 } 76 77 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 78 size_t size) 79 { 80 spin_lock(&dev_priv->mm.object_stat_lock); 81 dev_priv->mm.object_count--; 82 dev_priv->mm.object_memory -= size; 83 spin_unlock(&dev_priv->mm.object_stat_lock); 84 } 85 86 static int 87 i915_gem_wait_for_error(struct i915_gpu_error *error) 88 { 89 int ret; 90 91 if (!i915_reset_in_progress(error)) 92 return 0; 93 94 /* 95 * Only wait 10 seconds for the gpu reset to complete to avoid hanging 96 * userspace. If it takes that long something really bad is going on and 97 * we should simply try to bail out and fail as gracefully as possible. 98 */ 99 ret = wait_event_interruptible_timeout(error->reset_queue, 100 !i915_reset_in_progress(error), 101 10*HZ); 102 if (ret == 0) { 103 DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); 104 return -EIO; 105 } else if (ret < 0) { 106 return ret; 107 } else { 108 return 0; 109 } 110 } 111 112 int i915_mutex_lock_interruptible(struct drm_device *dev) 113 { 114 struct drm_i915_private *dev_priv = dev->dev_private; 115 int ret; 116 117 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 118 if (ret) 119 return ret; 120 121 ret = mutex_lock_interruptible(&dev->struct_mutex); 122 if (ret) 123 return ret; 124 125 WARN_ON(i915_verify_lists(dev)); 126 return 0; 127 } 128 129 int 130 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 131 struct drm_file *file) 132 { 133 struct drm_i915_private *dev_priv = to_i915(dev); 134 struct i915_ggtt *ggtt = &dev_priv->ggtt; 135 struct drm_i915_gem_get_aperture *args = data; 136 struct i915_vma *vma; 137 size_t pinned; 138 139 pinned = 0; 140 mutex_lock(&dev->struct_mutex); 141 list_for_each_entry(vma, &ggtt->base.active_list, vm_link) 142 if (vma->pin_count) 143 pinned += vma->node.size; 144 list_for_each_entry(vma, &ggtt->base.inactive_list, vm_link) 145 if (vma->pin_count) 146 pinned += vma->node.size; 147 mutex_unlock(&dev->struct_mutex); 148 149 args->aper_size = ggtt->base.total; 150 args->aper_available_size = args->aper_size - pinned; 151 152 return 0; 153 } 154 155 #if 0 156 static int 157 i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) 158 { 159 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; 160 char *vaddr = obj->phys_handle->vaddr; 161 struct sg_table *st; 162 struct scatterlist *sg; 163 int i; 164 165 if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) 166 return -EINVAL; 167 168 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 169 struct page *page; 170 char *src; 171 172 page = shmem_read_mapping_page(mapping, i); 173 if (IS_ERR(page)) 174 return PTR_ERR(page); 175 176 src = kmap_atomic(page); 177 memcpy(vaddr, src, PAGE_SIZE); 178 drm_clflush_virt_range(vaddr, PAGE_SIZE); 179 kunmap_atomic(src); 180 181 put_page(page); 182 vaddr += PAGE_SIZE; 183 } 184 185 i915_gem_chipset_flush(obj->base.dev); 186 187 st = kmalloc(sizeof(*st), GFP_KERNEL); 188 if (st == NULL) 189 return -ENOMEM; 190 191 if (sg_alloc_table(st, 1, GFP_KERNEL)) { 192 kfree(st); 193 return -ENOMEM; 194 } 195 196 sg = st->sgl; 197 sg->offset = 0; 198 sg->length = obj->base.size; 199 200 sg_dma_address(sg) = obj->phys_handle->busaddr; 201 sg_dma_len(sg) = obj->base.size; 202 203 obj->pages = st; 204 return 0; 205 } 206 207 static void 208 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj) 209 { 210 int ret; 211 212 BUG_ON(obj->madv == __I915_MADV_PURGED); 213 214 ret = i915_gem_object_set_to_cpu_domain(obj, true); 215 if (WARN_ON(ret)) { 216 /* In the event of a disaster, abandon all caches and 217 * hope for the best. 218 */ 219 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 220 } 221 222 if (obj->madv == I915_MADV_DONTNEED) 223 obj->dirty = 0; 224 225 if (obj->dirty) { 226 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; 227 char *vaddr = obj->phys_handle->vaddr; 228 int i; 229 230 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 231 struct page *page; 232 char *dst; 233 234 page = shmem_read_mapping_page(mapping, i); 235 if (IS_ERR(page)) 236 continue; 237 238 dst = kmap_atomic(page); 239 drm_clflush_virt_range(vaddr, PAGE_SIZE); 240 memcpy(dst, vaddr, PAGE_SIZE); 241 kunmap_atomic(dst); 242 243 set_page_dirty(page); 244 if (obj->madv == I915_MADV_WILLNEED) 245 mark_page_accessed(page); 246 put_page(page); 247 vaddr += PAGE_SIZE; 248 } 249 obj->dirty = 0; 250 } 251 252 sg_free_table(obj->pages); 253 kfree(obj->pages); 254 } 255 256 static void 257 i915_gem_object_release_phys(struct drm_i915_gem_object *obj) 258 { 259 drm_pci_free(obj->base.dev, obj->phys_handle); 260 } 261 262 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { 263 .get_pages = i915_gem_object_get_pages_phys, 264 .put_pages = i915_gem_object_put_pages_phys, 265 .release = i915_gem_object_release_phys, 266 }; 267 #endif 268 269 static int 270 drop_pages(struct drm_i915_gem_object *obj) 271 { 272 struct i915_vma *vma, *next; 273 int ret; 274 275 drm_gem_object_reference(&obj->base); 276 list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) 277 if (i915_vma_unbind(vma)) 278 break; 279 280 ret = i915_gem_object_put_pages(obj); 281 drm_gem_object_unreference(&obj->base); 282 283 return ret; 284 } 285 286 int 287 i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, 288 int align) 289 { 290 drm_dma_handle_t *phys; 291 int ret; 292 293 if (obj->phys_handle) { 294 if ((unsigned long)obj->phys_handle->vaddr & (align -1)) 295 return -EBUSY; 296 297 return 0; 298 } 299 300 if (obj->madv != I915_MADV_WILLNEED) 301 return -EFAULT; 302 303 if (obj->base.filp == NULL) 304 return -EINVAL; 305 306 ret = drop_pages(obj); 307 if (ret) 308 return ret; 309 310 /* create a new object */ 311 phys = drm_pci_alloc(obj->base.dev, obj->base.size, align); 312 if (!phys) 313 return -ENOMEM; 314 315 obj->phys_handle = phys; 316 #if 0 317 obj->ops = &i915_gem_phys_ops; 318 #endif 319 320 return i915_gem_object_get_pages(obj); 321 } 322 323 static int 324 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, 325 struct drm_i915_gem_pwrite *args, 326 struct drm_file *file_priv) 327 { 328 struct drm_device *dev = obj->base.dev; 329 void *vaddr = (char *)obj->phys_handle->vaddr + args->offset; 330 char __user *user_data = u64_to_user_ptr(args->data_ptr); 331 int ret = 0; 332 333 /* We manually control the domain here and pretend that it 334 * remains coherent i.e. in the GTT domain, like shmem_pwrite. 335 */ 336 ret = i915_gem_object_wait_rendering(obj, false); 337 if (ret) 338 return ret; 339 340 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 341 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) { 342 unsigned long unwritten; 343 344 /* The physical object once assigned is fixed for the lifetime 345 * of the obj, so we can safely drop the lock and continue 346 * to access vaddr. 347 */ 348 mutex_unlock(&dev->struct_mutex); 349 unwritten = copy_from_user(vaddr, user_data, args->size); 350 mutex_lock(&dev->struct_mutex); 351 if (unwritten) { 352 ret = -EFAULT; 353 goto out; 354 } 355 } 356 357 drm_clflush_virt_range(vaddr, args->size); 358 i915_gem_chipset_flush(dev); 359 360 out: 361 intel_fb_obj_flush(obj, false, ORIGIN_CPU); 362 return ret; 363 } 364 365 void *i915_gem_object_alloc(struct drm_device *dev) 366 { 367 return kmalloc(sizeof(struct drm_i915_gem_object), 368 M_DRM, M_WAITOK | M_ZERO); 369 } 370 371 void i915_gem_object_free(struct drm_i915_gem_object *obj) 372 { 373 kfree(obj); 374 } 375 376 static int 377 i915_gem_create(struct drm_file *file, 378 struct drm_device *dev, 379 uint64_t size, 380 uint32_t *handle_p) 381 { 382 struct drm_i915_gem_object *obj; 383 int ret; 384 u32 handle; 385 386 size = roundup(size, PAGE_SIZE); 387 if (size == 0) 388 return -EINVAL; 389 390 /* Allocate the new object */ 391 obj = i915_gem_alloc_object(dev, size); 392 if (obj == NULL) 393 return -ENOMEM; 394 395 ret = drm_gem_handle_create(file, &obj->base, &handle); 396 /* drop reference from allocate - handle holds it now */ 397 drm_gem_object_unreference_unlocked(&obj->base); 398 if (ret) 399 return ret; 400 401 *handle_p = handle; 402 return 0; 403 } 404 405 int 406 i915_gem_dumb_create(struct drm_file *file, 407 struct drm_device *dev, 408 struct drm_mode_create_dumb *args) 409 { 410 /* have to work out size/pitch and return them */ 411 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); 412 args->size = args->pitch * args->height; 413 return i915_gem_create(file, dev, 414 args->size, &args->handle); 415 } 416 417 /** 418 * Creates a new mm object and returns a handle to it. 419 */ 420 int 421 i915_gem_create_ioctl(struct drm_device *dev, void *data, 422 struct drm_file *file) 423 { 424 struct drm_i915_gem_create *args = data; 425 426 return i915_gem_create(file, dev, 427 args->size, &args->handle); 428 } 429 430 static inline int 431 __copy_to_user_swizzled(char __user *cpu_vaddr, 432 const char *gpu_vaddr, int gpu_offset, 433 int length) 434 { 435 int ret, cpu_offset = 0; 436 437 while (length > 0) { 438 int cacheline_end = ALIGN(gpu_offset + 1, 64); 439 int this_length = min(cacheline_end - gpu_offset, length); 440 int swizzled_gpu_offset = gpu_offset ^ 64; 441 442 ret = __copy_to_user(cpu_vaddr + cpu_offset, 443 gpu_vaddr + swizzled_gpu_offset, 444 this_length); 445 if (ret) 446 return ret + length; 447 448 cpu_offset += this_length; 449 gpu_offset += this_length; 450 length -= this_length; 451 } 452 453 return 0; 454 } 455 456 static inline int 457 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, 458 const char __user *cpu_vaddr, 459 int length) 460 { 461 int ret, cpu_offset = 0; 462 463 while (length > 0) { 464 int cacheline_end = ALIGN(gpu_offset + 1, 64); 465 int this_length = min(cacheline_end - gpu_offset, length); 466 int swizzled_gpu_offset = gpu_offset ^ 64; 467 468 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, 469 cpu_vaddr + cpu_offset, 470 this_length); 471 if (ret) 472 return ret + length; 473 474 cpu_offset += this_length; 475 gpu_offset += this_length; 476 length -= this_length; 477 } 478 479 return 0; 480 } 481 482 /* 483 * Pins the specified object's pages and synchronizes the object with 484 * GPU accesses. Sets needs_clflush to non-zero if the caller should 485 * flush the object from the CPU cache. 486 */ 487 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, 488 int *needs_clflush) 489 { 490 int ret; 491 492 *needs_clflush = 0; 493 494 #if 0 495 if (WARN_ON((obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE) == 0)) 496 return -EINVAL; 497 #endif 498 499 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) { 500 /* If we're not in the cpu read domain, set ourself into the gtt 501 * read domain and manually flush cachelines (if required). This 502 * optimizes for the case when the gpu will dirty the data 503 * anyway again before the next pread happens. */ 504 *needs_clflush = !cpu_cache_is_coherent(obj->base.dev, 505 obj->cache_level); 506 ret = i915_gem_object_wait_rendering(obj, true); 507 if (ret) 508 return ret; 509 } 510 511 ret = i915_gem_object_get_pages(obj); 512 if (ret) 513 return ret; 514 515 i915_gem_object_pin_pages(obj); 516 517 return ret; 518 } 519 520 /* Per-page copy function for the shmem pread fastpath. 521 * Flushes invalid cachelines before reading the target if 522 * needs_clflush is set. */ 523 static int 524 shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length, 525 char __user *user_data, 526 bool page_do_bit17_swizzling, bool needs_clflush) 527 { 528 char *vaddr; 529 int ret; 530 531 if (unlikely(page_do_bit17_swizzling)) 532 return -EINVAL; 533 534 vaddr = kmap_atomic(page); 535 if (needs_clflush) 536 drm_clflush_virt_range(vaddr + shmem_page_offset, 537 page_length); 538 ret = __copy_to_user_inatomic(user_data, 539 vaddr + shmem_page_offset, 540 page_length); 541 kunmap_atomic(vaddr); 542 543 return ret ? -EFAULT : 0; 544 } 545 546 static void 547 shmem_clflush_swizzled_range(char *addr, unsigned long length, 548 bool swizzled) 549 { 550 if (unlikely(swizzled)) { 551 unsigned long start = (unsigned long) addr; 552 unsigned long end = (unsigned long) addr + length; 553 554 /* For swizzling simply ensure that we always flush both 555 * channels. Lame, but simple and it works. Swizzled 556 * pwrite/pread is far from a hotpath - current userspace 557 * doesn't use it at all. */ 558 start = round_down(start, 128); 559 end = round_up(end, 128); 560 561 drm_clflush_virt_range((void *)start, end - start); 562 } else { 563 drm_clflush_virt_range(addr, length); 564 } 565 566 } 567 568 /* Only difference to the fast-path function is that this can handle bit17 569 * and uses non-atomic copy and kmap functions. */ 570 static int 571 shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length, 572 char __user *user_data, 573 bool page_do_bit17_swizzling, bool needs_clflush) 574 { 575 char *vaddr; 576 int ret; 577 578 vaddr = kmap(page); 579 if (needs_clflush) 580 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 581 page_length, 582 page_do_bit17_swizzling); 583 584 if (page_do_bit17_swizzling) 585 ret = __copy_to_user_swizzled(user_data, 586 vaddr, shmem_page_offset, 587 page_length); 588 else 589 ret = __copy_to_user(user_data, 590 vaddr + shmem_page_offset, 591 page_length); 592 kunmap(page); 593 594 return ret ? - EFAULT : 0; 595 } 596 597 static int 598 i915_gem_shmem_pread(struct drm_device *dev, 599 struct drm_i915_gem_object *obj, 600 struct drm_i915_gem_pread *args, 601 struct drm_file *file) 602 { 603 char __user *user_data; 604 ssize_t remain; 605 loff_t offset; 606 int shmem_page_offset, page_length, ret = 0; 607 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 608 int prefaulted = 0; 609 int needs_clflush = 0; 610 struct sg_page_iter sg_iter; 611 612 user_data = u64_to_user_ptr(args->data_ptr); 613 remain = args->size; 614 615 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 616 617 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); 618 if (ret) 619 return ret; 620 621 offset = args->offset; 622 623 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 624 offset >> PAGE_SHIFT) { 625 struct page *page = sg_page_iter_page(&sg_iter); 626 627 if (remain <= 0) 628 break; 629 630 /* Operation in this page 631 * 632 * shmem_page_offset = offset within page in shmem file 633 * page_length = bytes to copy for this page 634 */ 635 shmem_page_offset = offset_in_page(offset); 636 page_length = remain; 637 if ((shmem_page_offset + page_length) > PAGE_SIZE) 638 page_length = PAGE_SIZE - shmem_page_offset; 639 640 page_do_bit17_swizzling = obj_do_bit17_swizzling && 641 (page_to_phys(page) & (1 << 17)) != 0; 642 643 ret = shmem_pread_fast(page, shmem_page_offset, page_length, 644 user_data, page_do_bit17_swizzling, 645 needs_clflush); 646 if (ret == 0) 647 goto next_page; 648 649 mutex_unlock(&dev->struct_mutex); 650 651 if (likely(!i915.prefault_disable) && !prefaulted) { 652 ret = fault_in_multipages_writeable(user_data, remain); 653 /* Userspace is tricking us, but we've already clobbered 654 * its pages with the prefault and promised to write the 655 * data up to the first fault. Hence ignore any errors 656 * and just continue. */ 657 (void)ret; 658 prefaulted = 1; 659 } 660 661 ret = shmem_pread_slow(page, shmem_page_offset, page_length, 662 user_data, page_do_bit17_swizzling, 663 needs_clflush); 664 665 mutex_lock(&dev->struct_mutex); 666 667 if (ret) 668 goto out; 669 670 next_page: 671 remain -= page_length; 672 user_data += page_length; 673 offset += page_length; 674 } 675 676 out: 677 i915_gem_object_unpin_pages(obj); 678 679 return ret; 680 } 681 682 /** 683 * Reads data from the object referenced by handle. 684 * 685 * On error, the contents of *data are undefined. 686 */ 687 int 688 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 689 struct drm_file *file) 690 { 691 struct drm_i915_gem_pread *args = data; 692 struct drm_i915_gem_object *obj; 693 int ret = 0; 694 695 if (args->size == 0) 696 return 0; 697 698 #if 0 699 if (!access_ok(VERIFY_WRITE, 700 u64_to_user_ptr(args->data_ptr), 701 args->size)) 702 return -EFAULT; 703 #endif 704 705 ret = i915_mutex_lock_interruptible(dev); 706 if (ret) 707 return ret; 708 709 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 710 if (&obj->base == NULL) { 711 ret = -ENOENT; 712 goto unlock; 713 } 714 715 /* Bounds check source. */ 716 if (args->offset > obj->base.size || 717 args->size > obj->base.size - args->offset) { 718 ret = -EINVAL; 719 goto out; 720 } 721 722 /* prime objects have no backing filp to GEM pread/pwrite 723 * pages from. 724 */ 725 726 trace_i915_gem_object_pread(obj, args->offset, args->size); 727 728 ret = i915_gem_shmem_pread(dev, obj, args, file); 729 730 out: 731 drm_gem_object_unreference(&obj->base); 732 unlock: 733 mutex_unlock(&dev->struct_mutex); 734 return ret; 735 } 736 737 /* This is the fast write path which cannot handle 738 * page faults in the source data 739 */ 740 741 static inline int 742 fast_user_write(struct io_mapping *mapping, 743 loff_t page_base, int page_offset, 744 char __user *user_data, 745 int length) 746 { 747 void __iomem *vaddr_atomic; 748 void *vaddr; 749 unsigned long unwritten; 750 751 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); 752 /* We can use the cpu mem copy function because this is X86. */ 753 vaddr = (char __force*)vaddr_atomic + page_offset; 754 unwritten = __copy_from_user_inatomic_nocache(vaddr, 755 user_data, length); 756 io_mapping_unmap_atomic(vaddr_atomic); 757 return unwritten; 758 } 759 760 /** 761 * This is the fast pwrite path, where we copy the data directly from the 762 * user into the GTT, uncached. 763 */ 764 static int 765 i915_gem_gtt_pwrite_fast(struct drm_device *dev, 766 struct drm_i915_gem_object *obj, 767 struct drm_i915_gem_pwrite *args, 768 struct drm_file *file) 769 { 770 struct drm_i915_private *dev_priv = to_i915(dev); 771 struct i915_ggtt *ggtt = &dev_priv->ggtt; 772 ssize_t remain; 773 loff_t offset, page_base; 774 char __user *user_data; 775 int page_offset, page_length, ret; 776 777 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK); 778 if (ret) 779 goto out; 780 781 ret = i915_gem_object_set_to_gtt_domain(obj, true); 782 if (ret) 783 goto out_unpin; 784 785 ret = i915_gem_object_put_fence(obj); 786 if (ret) 787 goto out_unpin; 788 789 user_data = u64_to_user_ptr(args->data_ptr); 790 remain = args->size; 791 792 offset = i915_gem_obj_ggtt_offset(obj) + args->offset; 793 794 intel_fb_obj_invalidate(obj, ORIGIN_GTT); 795 796 while (remain > 0) { 797 /* Operation in this page 798 * 799 * page_base = page offset within aperture 800 * page_offset = offset within page 801 * page_length = bytes to copy for this page 802 */ 803 page_base = offset & LINUX_PAGE_MASK; 804 page_offset = offset_in_page(offset); 805 page_length = remain; 806 if ((page_offset + remain) > PAGE_SIZE) 807 page_length = PAGE_SIZE - page_offset; 808 809 /* If we get a fault while copying data, then (presumably) our 810 * source page isn't available. Return the error and we'll 811 * retry in the slow path. 812 */ 813 if (fast_user_write(ggtt->mappable, page_base, 814 page_offset, user_data, page_length)) { 815 ret = -EFAULT; 816 goto out_flush; 817 } 818 819 remain -= page_length; 820 user_data += page_length; 821 offset += page_length; 822 } 823 824 out_flush: 825 intel_fb_obj_flush(obj, false, ORIGIN_GTT); 826 out_unpin: 827 i915_gem_object_ggtt_unpin(obj); 828 out: 829 return ret; 830 } 831 832 /* Per-page copy function for the shmem pwrite fastpath. 833 * Flushes invalid cachelines before writing to the target if 834 * needs_clflush_before is set and flushes out any written cachelines after 835 * writing if needs_clflush is set. */ 836 static int 837 shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length, 838 char __user *user_data, 839 bool page_do_bit17_swizzling, 840 bool needs_clflush_before, 841 bool needs_clflush_after) 842 { 843 char *vaddr; 844 int ret; 845 846 if (unlikely(page_do_bit17_swizzling)) 847 return -EINVAL; 848 849 vaddr = kmap_atomic(page); 850 if (needs_clflush_before) 851 drm_clflush_virt_range(vaddr + shmem_page_offset, 852 page_length); 853 ret = __copy_from_user_inatomic(vaddr + shmem_page_offset, 854 user_data, page_length); 855 if (needs_clflush_after) 856 drm_clflush_virt_range(vaddr + shmem_page_offset, 857 page_length); 858 kunmap_atomic(vaddr); 859 860 return ret ? -EFAULT : 0; 861 } 862 863 /* Only difference to the fast-path function is that this can handle bit17 864 * and uses non-atomic copy and kmap functions. */ 865 static int 866 shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length, 867 char __user *user_data, 868 bool page_do_bit17_swizzling, 869 bool needs_clflush_before, 870 bool needs_clflush_after) 871 { 872 char *vaddr; 873 int ret; 874 875 vaddr = kmap(page); 876 if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) 877 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 878 page_length, 879 page_do_bit17_swizzling); 880 if (page_do_bit17_swizzling) 881 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset, 882 user_data, 883 page_length); 884 else 885 ret = __copy_from_user(vaddr + shmem_page_offset, 886 user_data, 887 page_length); 888 if (needs_clflush_after) 889 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 890 page_length, 891 page_do_bit17_swizzling); 892 kunmap(page); 893 894 return ret ? -EFAULT : 0; 895 } 896 897 static int 898 i915_gem_shmem_pwrite(struct drm_device *dev, 899 struct drm_i915_gem_object *obj, 900 struct drm_i915_gem_pwrite *args, 901 struct drm_file *file) 902 { 903 ssize_t remain; 904 loff_t offset; 905 char __user *user_data; 906 int shmem_page_offset, page_length, ret = 0; 907 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 908 int hit_slowpath = 0; 909 int needs_clflush_after = 0; 910 int needs_clflush_before = 0; 911 struct sg_page_iter sg_iter; 912 913 user_data = u64_to_user_ptr(args->data_ptr); 914 remain = args->size; 915 916 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 917 918 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 919 /* If we're not in the cpu write domain, set ourself into the gtt 920 * write domain and manually flush cachelines (if required). This 921 * optimizes for the case when the gpu will use the data 922 * right away and we therefore have to clflush anyway. */ 923 needs_clflush_after = cpu_write_needs_clflush(obj); 924 ret = i915_gem_object_wait_rendering(obj, false); 925 if (ret) 926 return ret; 927 } 928 /* Same trick applies to invalidate partially written cachelines read 929 * before writing. */ 930 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) 931 needs_clflush_before = 932 !cpu_cache_is_coherent(dev, obj->cache_level); 933 934 ret = i915_gem_object_get_pages(obj); 935 if (ret) 936 return ret; 937 938 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 939 940 i915_gem_object_pin_pages(obj); 941 942 offset = args->offset; 943 obj->dirty = 1; 944 945 VM_OBJECT_LOCK(obj->base.filp); 946 vm_object_pip_add(obj->base.filp, 1); 947 948 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 949 offset >> PAGE_SHIFT) { 950 struct page *page = sg_page_iter_page(&sg_iter); 951 int partial_cacheline_write; 952 953 if (remain <= 0) 954 break; 955 956 /* Operation in this page 957 * 958 * shmem_page_offset = offset within page in shmem file 959 * page_length = bytes to copy for this page 960 */ 961 shmem_page_offset = offset_in_page(offset); 962 963 page_length = remain; 964 if ((shmem_page_offset + page_length) > PAGE_SIZE) 965 page_length = PAGE_SIZE - shmem_page_offset; 966 967 /* If we don't overwrite a cacheline completely we need to be 968 * careful to have up-to-date data by first clflushing. Don't 969 * overcomplicate things and flush the entire patch. */ 970 partial_cacheline_write = needs_clflush_before && 971 ((shmem_page_offset | page_length) 972 & (cpu_clflush_line_size - 1)); 973 974 page_do_bit17_swizzling = obj_do_bit17_swizzling && 975 (page_to_phys(page) & (1 << 17)) != 0; 976 977 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length, 978 user_data, page_do_bit17_swizzling, 979 partial_cacheline_write, 980 needs_clflush_after); 981 if (ret == 0) 982 goto next_page; 983 984 hit_slowpath = 1; 985 mutex_unlock(&dev->struct_mutex); 986 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length, 987 user_data, page_do_bit17_swizzling, 988 partial_cacheline_write, 989 needs_clflush_after); 990 991 mutex_lock(&dev->struct_mutex); 992 993 if (ret) 994 goto out; 995 996 next_page: 997 remain -= page_length; 998 user_data += page_length; 999 offset += page_length; 1000 } 1001 vm_object_pip_wakeup(obj->base.filp); 1002 VM_OBJECT_UNLOCK(obj->base.filp); 1003 1004 out: 1005 i915_gem_object_unpin_pages(obj); 1006 1007 if (hit_slowpath) { 1008 /* 1009 * Fixup: Flush cpu caches in case we didn't flush the dirty 1010 * cachelines in-line while writing and the object moved 1011 * out of the cpu write domain while we've dropped the lock. 1012 */ 1013 if (!needs_clflush_after && 1014 obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 1015 if (i915_gem_clflush_object(obj, obj->pin_display)) 1016 needs_clflush_after = true; 1017 } 1018 } 1019 1020 if (needs_clflush_after) 1021 i915_gem_chipset_flush(dev); 1022 else 1023 obj->cache_dirty = true; 1024 1025 intel_fb_obj_flush(obj, false, ORIGIN_CPU); 1026 return ret; 1027 } 1028 1029 /** 1030 * Writes data to the object referenced by handle. 1031 * 1032 * On error, the contents of the buffer that were to be modified are undefined. 1033 */ 1034 int 1035 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 1036 struct drm_file *file) 1037 { 1038 struct drm_i915_private *dev_priv = dev->dev_private; 1039 struct drm_i915_gem_pwrite *args = data; 1040 struct drm_i915_gem_object *obj; 1041 int ret; 1042 1043 if (args->size == 0) 1044 return 0; 1045 1046 #if 0 1047 if (!access_ok(VERIFY_READ, 1048 u64_to_user_ptr(args->data_ptr), 1049 args->size)) 1050 return -EFAULT; 1051 #endif 1052 1053 if (likely(!i915.prefault_disable)) { 1054 ret = fault_in_multipages_readable(u64_to_user_ptr(args->data_ptr), 1055 args->size); 1056 if (ret) 1057 return -EFAULT; 1058 } 1059 1060 intel_runtime_pm_get(dev_priv); 1061 1062 ret = i915_mutex_lock_interruptible(dev); 1063 if (ret) 1064 goto put_rpm; 1065 1066 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 1067 if (&obj->base == NULL) { 1068 ret = -ENOENT; 1069 goto unlock; 1070 } 1071 1072 /* Bounds check destination. */ 1073 if (args->offset > obj->base.size || 1074 args->size > obj->base.size - args->offset) { 1075 ret = -EINVAL; 1076 goto out; 1077 } 1078 1079 /* prime objects have no backing filp to GEM pread/pwrite 1080 * pages from. 1081 */ 1082 1083 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 1084 1085 ret = -EFAULT; 1086 /* We can only do the GTT pwrite on untiled buffers, as otherwise 1087 * it would end up going through the fenced access, and we'll get 1088 * different detiling behavior between reading and writing. 1089 * pread/pwrite currently are reading and writing from the CPU 1090 * perspective, requiring manual detiling by the client. 1091 */ 1092 if (obj->tiling_mode == I915_TILING_NONE && 1093 obj->base.write_domain != I915_GEM_DOMAIN_CPU && 1094 cpu_write_needs_clflush(obj)) { 1095 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); 1096 /* Note that the gtt paths might fail with non-page-backed user 1097 * pointers (e.g. gtt mappings when moving data between 1098 * textures). Fallback to the shmem path in that case. */ 1099 } 1100 1101 if (ret == -EFAULT || ret == -ENOSPC) { 1102 if (obj->phys_handle) 1103 ret = i915_gem_phys_pwrite(obj, args, file); 1104 else 1105 ret = i915_gem_shmem_pwrite(dev, obj, args, file); 1106 } 1107 1108 out: 1109 drm_gem_object_unreference(&obj->base); 1110 unlock: 1111 mutex_unlock(&dev->struct_mutex); 1112 put_rpm: 1113 intel_runtime_pm_put(dev_priv); 1114 1115 return ret; 1116 } 1117 1118 static int 1119 i915_gem_check_wedge(unsigned reset_counter, bool interruptible) 1120 { 1121 if (__i915_terminally_wedged(reset_counter)) 1122 return -EIO; 1123 1124 if (__i915_reset_in_progress(reset_counter)) { 1125 /* Non-interruptible callers can't handle -EAGAIN, hence return 1126 * -EIO unconditionally for these. */ 1127 if (!interruptible) 1128 return -EIO; 1129 1130 return -EAGAIN; 1131 } 1132 1133 return 0; 1134 } 1135 1136 static void fake_irq(unsigned long data) 1137 { 1138 wakeup_one((void *)data); 1139 } 1140 1141 static bool missed_irq(struct drm_i915_private *dev_priv, 1142 struct intel_engine_cs *engine) 1143 { 1144 return test_bit(engine->id, &dev_priv->gpu_error.missed_irq_rings); 1145 } 1146 1147 #if 0 1148 static int __i915_spin_request(struct drm_i915_gem_request *req, int state) 1149 { 1150 unsigned long timeout; 1151 unsigned cpu; 1152 1153 /* When waiting for high frequency requests, e.g. during synchronous 1154 * rendering split between the CPU and GPU, the finite amount of time 1155 * required to set up the irq and wait upon it limits the response 1156 * rate. By busywaiting on the request completion for a short while we 1157 * can service the high frequency waits as quick as possible. However, 1158 * if it is a slow request, we want to sleep as quickly as possible. 1159 * The tradeoff between waiting and sleeping is roughly the time it 1160 * takes to sleep on a request, on the order of a microsecond. 1161 */ 1162 1163 if (req->engine->irq_refcount) 1164 return -EBUSY; 1165 1166 /* Only spin if we know the GPU is processing this request */ 1167 if (!i915_gem_request_started(req, true)) 1168 return -EAGAIN; 1169 1170 timeout = local_clock_us(&cpu) + 5; 1171 while (!need_resched()) { 1172 if (i915_gem_request_completed(req, true)) 1173 return 0; 1174 1175 if (signal_pending_state(state, current)) 1176 break; 1177 1178 if (busywait_stop(timeout, cpu)) 1179 break; 1180 1181 cpu_relax_lowlatency(); 1182 } 1183 1184 if (i915_gem_request_completed(req, false)) 1185 return 0; 1186 1187 return -EAGAIN; 1188 } 1189 #endif 1190 1191 /** 1192 * __i915_wait_request - wait until execution of request has finished 1193 * @req: duh! 1194 * @interruptible: do an interruptible wait (normally yes) 1195 * @timeout: in - how long to wait (NULL forever); out - how much time remaining 1196 * 1197 * Note: It is of utmost importance that the passed in seqno and reset_counter 1198 * values have been read by the caller in an smp safe manner. Where read-side 1199 * locks are involved, it is sufficient to read the reset_counter before 1200 * unlocking the lock that protects the seqno. For lockless tricks, the 1201 * reset_counter _must_ be read before, and an appropriate smp_rmb must be 1202 * inserted. 1203 * 1204 * Returns 0 if the request was found within the alloted time. Else returns the 1205 * errno with remaining time filled in timeout argument. 1206 */ 1207 int __i915_wait_request(struct drm_i915_gem_request *req, 1208 bool interruptible, 1209 s64 *timeout, 1210 struct intel_rps_client *rps) 1211 { 1212 struct intel_engine_cs *engine = i915_gem_request_get_engine(req); 1213 struct drm_device *dev = engine->dev; 1214 struct drm_i915_private *dev_priv = dev->dev_private; 1215 const bool irq_test_in_progress = 1216 ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_engine_flag(engine); 1217 unsigned long timeout_expire; 1218 s64 before = 0; /* Only to silence a compiler warning. */ 1219 int ret, sl_timeout = 1; 1220 1221 WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled"); 1222 1223 if (list_empty(&req->list)) 1224 return 0; 1225 1226 if (i915_gem_request_completed(req, true)) 1227 return 0; 1228 1229 timeout_expire = 0; 1230 if (timeout) { 1231 if (WARN_ON(*timeout < 0)) 1232 return -EINVAL; 1233 1234 if (*timeout == 0) 1235 return -ETIME; 1236 1237 timeout_expire = jiffies + nsecs_to_jiffies_timeout(*timeout); 1238 1239 /* 1240 * Record current time in case interrupted by signal, or wedged. 1241 */ 1242 before = ktime_get_raw_ns(); 1243 } 1244 1245 if (INTEL_INFO(dev_priv)->gen >= 6) 1246 gen6_rps_boost(dev_priv, rps, req->emitted_jiffies); 1247 1248 trace_i915_gem_request_wait_begin(req); 1249 1250 /* Optimistic spin for the next jiffie before touching IRQs */ 1251 #if 0 1252 ret = __i915_spin_request(req); 1253 if (ret == 0) 1254 goto out; 1255 #endif 1256 1257 if (!irq_test_in_progress && WARN_ON(!engine->irq_get(engine))) { 1258 ret = -ENODEV; 1259 goto out; 1260 } 1261 1262 lockmgr(&engine->irq_queue.lock, LK_EXCLUSIVE); 1263 for (;;) { 1264 struct timer_list timer; 1265 1266 /* We need to check whether any gpu reset happened in between 1267 * the request being submitted and now. If a reset has occurred, 1268 * the request is effectively complete (we either are in the 1269 * process of or have discarded the rendering and completely 1270 * reset the GPU. The results of the request are lost and we 1271 * are free to continue on with the original operation. 1272 */ 1273 if (req->reset_counter != i915_reset_counter(&dev_priv->gpu_error)) { 1274 ret = 0; 1275 break; 1276 } 1277 1278 if (i915_gem_request_completed(req, false)) { 1279 ret = 0; 1280 break; 1281 } 1282 1283 if (interruptible && signal_pending(curthread->td_lwp)) { 1284 ret = -ERESTARTSYS; 1285 break; 1286 } 1287 1288 if (timeout && time_after_eq(jiffies, timeout_expire)) { 1289 ret = -ETIME; 1290 break; 1291 } 1292 1293 timer.function = NULL; 1294 if (timeout || missed_irq(dev_priv, engine)) { 1295 unsigned long expire; 1296 1297 setup_timer_on_stack(&timer, fake_irq, (unsigned long)&engine->irq_queue); 1298 expire = missed_irq(dev_priv, engine) ? jiffies + 1 : timeout_expire; 1299 sl_timeout = expire - jiffies; 1300 if (sl_timeout < 1) 1301 sl_timeout = 1; 1302 mod_timer(&timer, expire); 1303 } 1304 1305 #if 0 1306 io_schedule(); 1307 #endif 1308 1309 if (timer.function) { 1310 del_singleshot_timer_sync(&timer); 1311 destroy_timer_on_stack(&timer); 1312 } 1313 1314 lksleep(&engine->irq_queue, &engine->irq_queue.lock, 1315 interruptible ? PCATCH : 0, "lwe", sl_timeout); 1316 } 1317 lockmgr(&engine->irq_queue.lock, LK_RELEASE); 1318 if (!irq_test_in_progress) 1319 engine->irq_put(engine); 1320 1321 out: 1322 trace_i915_gem_request_wait_end(req); 1323 1324 if (timeout) { 1325 s64 tres = *timeout - (ktime_get_raw_ns() - before); 1326 1327 *timeout = tres < 0 ? 0 : tres; 1328 1329 /* 1330 * Apparently ktime isn't accurate enough and occasionally has a 1331 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch 1332 * things up to make the test happy. We allow up to 1 jiffy. 1333 * 1334 * This is a regrssion from the timespec->ktime conversion. 1335 */ 1336 if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000) 1337 *timeout = 0; 1338 } 1339 1340 return ret; 1341 } 1342 1343 int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, 1344 struct drm_file *file) 1345 { 1346 struct drm_i915_file_private *file_priv; 1347 1348 WARN_ON(!req || !file || req->file_priv); 1349 1350 if (!req || !file) 1351 return -EINVAL; 1352 1353 if (req->file_priv) 1354 return -EINVAL; 1355 1356 file_priv = file->driver_priv; 1357 1358 spin_lock(&file_priv->mm.lock); 1359 req->file_priv = file_priv; 1360 list_add_tail(&req->client_list, &file_priv->mm.request_list); 1361 spin_unlock(&file_priv->mm.lock); 1362 1363 req->pid = curproc->p_pid; 1364 1365 return 0; 1366 } 1367 1368 static inline void 1369 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) 1370 { 1371 struct drm_i915_file_private *file_priv = request->file_priv; 1372 1373 if (!file_priv) 1374 return; 1375 1376 spin_lock(&file_priv->mm.lock); 1377 list_del(&request->client_list); 1378 request->file_priv = NULL; 1379 spin_unlock(&file_priv->mm.lock); 1380 1381 #if 0 1382 put_pid(request->pid); 1383 request->pid = NULL; 1384 #endif 1385 } 1386 1387 static void i915_gem_request_retire(struct drm_i915_gem_request *request) 1388 { 1389 trace_i915_gem_request_retire(request); 1390 1391 /* We know the GPU must have read the request to have 1392 * sent us the seqno + interrupt, so use the position 1393 * of tail of the request to update the last known position 1394 * of the GPU head. 1395 * 1396 * Note this requires that we are always called in request 1397 * completion order. 1398 */ 1399 request->ringbuf->last_retired_head = request->postfix; 1400 1401 list_del_init(&request->list); 1402 i915_gem_request_remove_from_client(request); 1403 1404 i915_gem_request_unreference(request); 1405 } 1406 1407 static void 1408 __i915_gem_request_retire__upto(struct drm_i915_gem_request *req) 1409 { 1410 struct intel_engine_cs *engine = req->engine; 1411 struct drm_i915_gem_request *tmp; 1412 1413 lockdep_assert_held(&engine->dev->struct_mutex); 1414 1415 if (list_empty(&req->list)) 1416 return; 1417 1418 do { 1419 tmp = list_first_entry(&engine->request_list, 1420 typeof(*tmp), list); 1421 1422 i915_gem_request_retire(tmp); 1423 } while (tmp != req); 1424 1425 WARN_ON(i915_verify_lists(engine->dev)); 1426 } 1427 1428 /** 1429 * Waits for a request to be signaled, and cleans up the 1430 * request and object lists appropriately for that event. 1431 */ 1432 int 1433 i915_wait_request(struct drm_i915_gem_request *req) 1434 { 1435 struct drm_i915_private *dev_priv = req->i915; 1436 bool interruptible; 1437 int ret; 1438 1439 interruptible = dev_priv->mm.interruptible; 1440 1441 BUG_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex)); 1442 1443 ret = __i915_wait_request(req, interruptible, NULL, NULL); 1444 if (ret) 1445 return ret; 1446 1447 /* If the GPU hung, we want to keep the requests to find the guilty. */ 1448 if (req->reset_counter == i915_reset_counter(&dev_priv->gpu_error)) 1449 __i915_gem_request_retire__upto(req); 1450 1451 return 0; 1452 } 1453 1454 /** 1455 * Ensures that all rendering to the object has completed and the object is 1456 * safe to unbind from the GTT or access from the CPU. 1457 */ 1458 int 1459 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, 1460 bool readonly) 1461 { 1462 int ret, i; 1463 1464 if (!obj->active) 1465 return 0; 1466 1467 if (readonly) { 1468 if (obj->last_write_req != NULL) { 1469 ret = i915_wait_request(obj->last_write_req); 1470 if (ret) 1471 return ret; 1472 1473 i = obj->last_write_req->engine->id; 1474 if (obj->last_read_req[i] == obj->last_write_req) 1475 i915_gem_object_retire__read(obj, i); 1476 else 1477 i915_gem_object_retire__write(obj); 1478 } 1479 } else { 1480 for (i = 0; i < I915_NUM_ENGINES; i++) { 1481 if (obj->last_read_req[i] == NULL) 1482 continue; 1483 1484 ret = i915_wait_request(obj->last_read_req[i]); 1485 if (ret) 1486 return ret; 1487 1488 i915_gem_object_retire__read(obj, i); 1489 } 1490 GEM_BUG_ON(obj->active); 1491 } 1492 1493 return 0; 1494 } 1495 1496 static void 1497 i915_gem_object_retire_request(struct drm_i915_gem_object *obj, 1498 struct drm_i915_gem_request *req) 1499 { 1500 int ring = req->engine->id; 1501 1502 if (obj->last_read_req[ring] == req) 1503 i915_gem_object_retire__read(obj, ring); 1504 else if (obj->last_write_req == req) 1505 i915_gem_object_retire__write(obj); 1506 1507 if (req->reset_counter == i915_reset_counter(&req->i915->gpu_error)) 1508 __i915_gem_request_retire__upto(req); 1509 } 1510 1511 /* A nonblocking variant of the above wait. This is a highly dangerous routine 1512 * as the object state may change during this call. 1513 */ 1514 static __must_check int 1515 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, 1516 struct intel_rps_client *rps, 1517 bool readonly) 1518 { 1519 struct drm_device *dev = obj->base.dev; 1520 struct drm_i915_private *dev_priv = dev->dev_private; 1521 struct drm_i915_gem_request *requests[I915_NUM_ENGINES]; 1522 int ret, i, n = 0; 1523 1524 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1525 BUG_ON(!dev_priv->mm.interruptible); 1526 1527 if (!obj->active) 1528 return 0; 1529 1530 if (readonly) { 1531 struct drm_i915_gem_request *req; 1532 1533 req = obj->last_write_req; 1534 if (req == NULL) 1535 return 0; 1536 1537 requests[n++] = i915_gem_request_reference(req); 1538 } else { 1539 for (i = 0; i < I915_NUM_ENGINES; i++) { 1540 struct drm_i915_gem_request *req; 1541 1542 req = obj->last_read_req[i]; 1543 if (req == NULL) 1544 continue; 1545 1546 requests[n++] = i915_gem_request_reference(req); 1547 } 1548 } 1549 1550 mutex_unlock(&dev->struct_mutex); 1551 ret = 0; 1552 for (i = 0; ret == 0 && i < n; i++) 1553 ret = __i915_wait_request(requests[i], true, NULL, rps); 1554 mutex_lock(&dev->struct_mutex); 1555 1556 for (i = 0; i < n; i++) { 1557 if (ret == 0) 1558 i915_gem_object_retire_request(obj, requests[i]); 1559 i915_gem_request_unreference(requests[i]); 1560 } 1561 1562 return ret; 1563 } 1564 1565 static struct intel_rps_client *to_rps_client(struct drm_file *file) 1566 { 1567 struct drm_i915_file_private *fpriv = file->driver_priv; 1568 return &fpriv->rps; 1569 } 1570 1571 /** 1572 * Called when user space prepares to use an object with the CPU, either 1573 * through the mmap ioctl's mapping or a GTT mapping. 1574 */ 1575 int 1576 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1577 struct drm_file *file) 1578 { 1579 struct drm_i915_gem_set_domain *args = data; 1580 struct drm_i915_gem_object *obj; 1581 uint32_t read_domains = args->read_domains; 1582 uint32_t write_domain = args->write_domain; 1583 int ret; 1584 1585 /* Only handle setting domains to types used by the CPU. */ 1586 if (write_domain & I915_GEM_GPU_DOMAINS) 1587 return -EINVAL; 1588 1589 if (read_domains & I915_GEM_GPU_DOMAINS) 1590 return -EINVAL; 1591 1592 /* Having something in the write domain implies it's in the read 1593 * domain, and only that read domain. Enforce that in the request. 1594 */ 1595 if (write_domain != 0 && read_domains != write_domain) 1596 return -EINVAL; 1597 1598 ret = i915_mutex_lock_interruptible(dev); 1599 if (ret) 1600 return ret; 1601 1602 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 1603 if (&obj->base == NULL) { 1604 ret = -ENOENT; 1605 goto unlock; 1606 } 1607 1608 /* Try to flush the object off the GPU without holding the lock. 1609 * We will repeat the flush holding the lock in the normal manner 1610 * to catch cases where we are gazumped. 1611 */ 1612 ret = i915_gem_object_wait_rendering__nonblocking(obj, 1613 to_rps_client(file), 1614 !write_domain); 1615 if (ret) 1616 goto unref; 1617 1618 if (read_domains & I915_GEM_DOMAIN_GTT) 1619 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 1620 else 1621 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 1622 1623 if (write_domain != 0) 1624 intel_fb_obj_invalidate(obj, 1625 write_domain == I915_GEM_DOMAIN_GTT ? 1626 ORIGIN_GTT : ORIGIN_CPU); 1627 1628 unref: 1629 drm_gem_object_unreference(&obj->base); 1630 unlock: 1631 mutex_unlock(&dev->struct_mutex); 1632 return ret; 1633 } 1634 1635 /** 1636 * Called when user space has done writes to this buffer 1637 */ 1638 int 1639 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1640 struct drm_file *file) 1641 { 1642 struct drm_i915_gem_sw_finish *args = data; 1643 struct drm_i915_gem_object *obj; 1644 int ret = 0; 1645 1646 ret = i915_mutex_lock_interruptible(dev); 1647 if (ret) 1648 return ret; 1649 1650 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 1651 if (&obj->base == NULL) { 1652 ret = -ENOENT; 1653 goto unlock; 1654 } 1655 1656 /* Pinned buffers may be scanout, so flush the cache */ 1657 if (obj->pin_display) 1658 i915_gem_object_flush_cpu_write_domain(obj); 1659 1660 drm_gem_object_unreference(&obj->base); 1661 unlock: 1662 mutex_unlock(&dev->struct_mutex); 1663 return ret; 1664 } 1665 1666 /** 1667 * Maps the contents of an object, returning the address it is mapped 1668 * into. 1669 * 1670 * While the mapping holds a reference on the contents of the object, it doesn't 1671 * imply a ref on the object itself. 1672 * 1673 * IMPORTANT: 1674 * 1675 * DRM driver writers who look a this function as an example for how to do GEM 1676 * mmap support, please don't implement mmap support like here. The modern way 1677 * to implement DRM mmap support is with an mmap offset ioctl (like 1678 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly. 1679 * That way debug tooling like valgrind will understand what's going on, hiding 1680 * the mmap call in a driver private ioctl will break that. The i915 driver only 1681 * does cpu mmaps this way because we didn't know better. 1682 */ 1683 int 1684 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1685 struct drm_file *file) 1686 { 1687 struct drm_i915_gem_mmap *args = data; 1688 struct drm_gem_object *obj; 1689 unsigned long addr; 1690 1691 struct proc *p = curproc; 1692 vm_map_t map = &p->p_vmspace->vm_map; 1693 vm_size_t size; 1694 int error = 0, rv; 1695 1696 if (args->flags & ~(I915_MMAP_WC)) 1697 return -EINVAL; 1698 1699 obj = drm_gem_object_lookup(file, args->handle); 1700 if (obj == NULL) 1701 return -ENOENT; 1702 1703 if (args->size == 0) 1704 goto out; 1705 1706 size = round_page(args->size); 1707 if (map->size + size > p->p_rlimit[RLIMIT_VMEM].rlim_cur) { 1708 error = -ENOMEM; 1709 goto out; 1710 } 1711 1712 /* prime objects have no backing filp to GEM mmap 1713 * pages from. 1714 */ 1715 if (!obj->filp) { 1716 drm_gem_object_unreference_unlocked(obj); 1717 return -EINVAL; 1718 } 1719 1720 /* 1721 * Call hint to ensure that NULL is not returned as a valid address 1722 * and to reduce vm_map traversals. XXX causes instability, use a 1723 * fixed low address as the start point instead to avoid the NULL 1724 * return issue. 1725 */ 1726 1727 addr = PAGE_SIZE; 1728 1729 /* 1730 * Use 256KB alignment. It is unclear why this matters for a 1731 * virtual address but it appears to fix a number of application/X 1732 * crashes and kms console switching is much faster. 1733 */ 1734 vm_object_hold(obj->filp); 1735 vm_object_reference_locked(obj->filp); 1736 vm_object_drop(obj->filp); 1737 1738 /* Something gets wrong here: fails to mmap 4096 */ 1739 rv = vm_map_find(map, obj->filp, NULL, 1740 args->offset, &addr, args->size, 1741 256 * 1024, /* align */ 1742 TRUE, /* fitit */ 1743 VM_MAPTYPE_NORMAL, VM_SUBSYS_DRM_GEM, 1744 VM_PROT_READ | VM_PROT_WRITE, /* prot */ 1745 VM_PROT_READ | VM_PROT_WRITE, /* max */ 1746 MAP_SHARED /* cow */); 1747 if (rv != KERN_SUCCESS) { 1748 vm_object_deallocate(obj->filp); 1749 error = -vm_mmap_to_errno(rv); 1750 } else { 1751 args->addr_ptr = (uint64_t)addr; 1752 } 1753 out: 1754 drm_gem_object_unreference(obj); 1755 return (error); 1756 } 1757 1758 /** 1759 * i915_gem_fault - fault a page into the GTT 1760 * 1761 * vm_obj is locked on entry and expected to be locked on return. 1762 * 1763 * The vm_pager has placemarked the object with an anonymous memory page 1764 * which we must replace atomically to avoid races against concurrent faults 1765 * on the same page. XXX we currently are unable to do this atomically. 1766 * 1767 * If we are to return an error we should not touch the anonymous page, 1768 * the caller will deallocate it. 1769 * 1770 * XXX Most GEM calls appear to be interruptable, but we can't hard loop 1771 * in that case. Release all resources and wait 1 tick before retrying. 1772 * This is a huge problem which needs to be fixed by getting rid of most 1773 * of the interruptability. The linux code does not retry but does appear 1774 * to have some sort of mechanism (VM_FAULT_NOPAGE ?) for the higher level 1775 * to be able to retry. 1776 * 1777 * -- 1778 * @vma: VMA in question 1779 * @vmf: fault info 1780 * 1781 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1782 * from userspace. The fault handler takes care of binding the object to 1783 * the GTT (if needed), allocating and programming a fence register (again, 1784 * only if needed based on whether the old reg is still valid or the object 1785 * is tiled) and inserting a new PTE into the faulting process. 1786 * 1787 * Note that the faulting process may involve evicting existing objects 1788 * from the GTT and/or fence registers to make room. So performance may 1789 * suffer if the GTT working set is large or there are few fence registers 1790 * left. 1791 * 1792 * vm_obj is locked on entry and expected to be locked on return. The VM 1793 * pager has placed an anonymous memory page at (obj,offset) which we have 1794 * to replace. 1795 */ 1796 int i915_gem_fault(vm_object_t vm_obj, vm_ooffset_t offset, int prot, vm_page_t *mres) 1797 { 1798 struct drm_i915_gem_object *obj = to_intel_bo(vm_obj->handle); 1799 struct drm_device *dev = obj->base.dev; 1800 struct drm_i915_private *dev_priv = to_i915(dev); 1801 struct i915_ggtt *ggtt = &dev_priv->ggtt; 1802 struct i915_ggtt_view view = i915_ggtt_view_normal; 1803 unsigned long page_offset; 1804 vm_page_t m; 1805 int ret = 0; 1806 bool write = !!(prot & VM_PROT_WRITE); 1807 1808 intel_runtime_pm_get(dev_priv); 1809 1810 /* We don't use vmf->pgoff since that has the fake offset */ 1811 page_offset = (unsigned long)offset; 1812 1813 /* 1814 * vm_fault() has supplied us with a busied page placeholding 1815 * the operation. This presents a lock order reversal issue 1816 * again i915_gem_release_mmap() for our device mutex. 1817 * 1818 * Deal with the problem by getting rid of the placeholder now, 1819 * and then dealing with the potential for a new placeholder when 1820 * we try to insert later. 1821 */ 1822 if (*mres != NULL) { 1823 m = *mres; 1824 *mres = NULL; 1825 if ((m->busy_count & PBUSY_LOCKED) == 0) 1826 kprintf("i915_gem_fault: Page was not busy\n"); 1827 else 1828 vm_page_remove(m); 1829 vm_page_free(m); 1830 } 1831 1832 m = NULL; 1833 1834 retry: 1835 ret = i915_mutex_lock_interruptible(dev); 1836 if (ret) 1837 goto out; 1838 1839 trace_i915_gem_object_fault(obj, page_offset, true, write); 1840 1841 /* Try to flush the object off the GPU first without holding the lock. 1842 * Upon reacquiring the lock, we will perform our sanity checks and then 1843 * repeat the flush holding the lock in the normal manner to catch cases 1844 * where we are gazumped. 1845 */ 1846 ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write); 1847 if (ret) 1848 goto unlock; 1849 1850 /* Access to snoopable pages through the GTT is incoherent. */ 1851 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) { 1852 ret = -EFAULT; 1853 goto unlock; 1854 } 1855 1856 /* Use a partial view if the object is bigger than the aperture. */ 1857 if (obj->base.size >= ggtt->mappable_end && 1858 obj->tiling_mode == I915_TILING_NONE) { 1859 #if 0 1860 static const unsigned int chunk_size = 256; // 1 MiB 1861 1862 memset(&view, 0, sizeof(view)); 1863 view.type = I915_GGTT_VIEW_PARTIAL; 1864 view.params.partial.offset = rounddown(page_offset, chunk_size); 1865 view.params.partial.size = 1866 min_t(unsigned int, 1867 chunk_size, 1868 (vma->vm_end - vma->vm_start)/PAGE_SIZE - 1869 view.params.partial.offset); 1870 #endif 1871 } 1872 1873 /* Now pin it into the GTT if needed */ 1874 ret = i915_gem_object_ggtt_pin(obj, &view, 0, PIN_MAPPABLE); 1875 if (ret) 1876 goto unlock; 1877 1878 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1879 if (ret) 1880 goto unpin; 1881 1882 ret = i915_gem_object_get_fence(obj); 1883 if (ret) 1884 goto unpin; 1885 1886 /* 1887 * START FREEBSD MAGIC 1888 * 1889 * Add a pip count to avoid destruction and certain other 1890 * complex operations (such as collapses?) while unlocked. 1891 */ 1892 vm_object_pip_add(vm_obj, 1); 1893 1894 ret = 0; 1895 m = NULL; 1896 1897 /* 1898 * Since the object lock was dropped, another thread might have 1899 * faulted on the same GTT address and instantiated the mapping. 1900 * Recheck. 1901 */ 1902 m = vm_page_lookup(vm_obj, OFF_TO_IDX(offset)); 1903 if (m != NULL) { 1904 /* 1905 * Try to busy the page, retry on failure (non-zero ret). 1906 */ 1907 if (vm_page_busy_try(m, false)) { 1908 kprintf("i915_gem_fault: BUSY\n"); 1909 ret = -EINTR; 1910 goto unlock; 1911 } 1912 goto have_page; 1913 } 1914 /* 1915 * END FREEBSD MAGIC 1916 */ 1917 1918 obj->fault_mappable = true; 1919 1920 /* Finally, remap it using the new GTT offset */ 1921 m = vm_phys_fictitious_to_vm_page(ggtt->mappable_base + 1922 i915_gem_obj_ggtt_offset_view(obj, &view) + offset); 1923 if (m == NULL) { 1924 ret = -EFAULT; 1925 goto unpin; 1926 } 1927 KASSERT((m->flags & PG_FICTITIOUS) != 0, ("not fictitious %p", m)); 1928 KASSERT(m->wire_count == 1, ("wire_count not 1 %p", m)); 1929 1930 /* 1931 * Try to busy the page. Fails on non-zero return. 1932 */ 1933 if (vm_page_busy_try(m, false)) { 1934 kprintf("i915_gem_fault: BUSY(2)\n"); 1935 ret = -EINTR; 1936 goto unpin; 1937 } 1938 m->valid = VM_PAGE_BITS_ALL; 1939 1940 #if 1 1941 /* 1942 * This should always work since we already checked via a lookup 1943 * above. 1944 */ 1945 if (vm_page_insert(m, vm_obj, OFF_TO_IDX(offset)) == FALSE) { 1946 kprintf("i915:gem_fault: page %p,%jd already in object\n", 1947 vm_obj, 1948 OFF_TO_IDX(offset)); 1949 vm_page_wakeup(m); 1950 ret = -EINTR; 1951 goto unpin; 1952 } 1953 #else 1954 /* NOT COMPILED ATM */ 1955 if (unlikely(view.type == I915_GGTT_VIEW_PARTIAL)) { 1956 /* Overriding existing pages in partial view does not cause 1957 * us any trouble as TLBs are still valid because the fault 1958 * is due to userspace losing part of the mapping or never 1959 * having accessed it before (at this partials' range). 1960 */ 1961 unsigned long base = vma->vm_start + 1962 (view.params.partial.offset << PAGE_SHIFT); 1963 unsigned int i; 1964 1965 for (i = 0; i < view.params.partial.size; i++) { 1966 ret = vm_insert_pfn(vma, base + i * PAGE_SIZE, pfn + i); 1967 if (ret) 1968 break; 1969 } 1970 1971 obj->fault_mappable = true; 1972 } else { 1973 if (!obj->fault_mappable) { 1974 unsigned long size = min_t(unsigned long, 1975 vma->vm_end - vma->vm_start, 1976 obj->base.size); 1977 int i; 1978 1979 for (i = 0; i < size >> PAGE_SHIFT; i++) { 1980 ret = vm_insert_pfn(vma, 1981 (unsigned long)vma->vm_start + i * PAGE_SIZE, 1982 pfn + i); 1983 if (ret) 1984 break; 1985 } 1986 1987 obj->fault_mappable = true; 1988 } else 1989 ret = vm_insert_pfn(vma, 1990 (unsigned long)vmf->virtual_address, 1991 pfn + page_offset); 1992 } 1993 #endif 1994 1995 have_page: 1996 *mres = m; 1997 1998 i915_gem_object_ggtt_unpin_view(obj, &view); 1999 mutex_unlock(&dev->struct_mutex); 2000 ret = VM_PAGER_OK; 2001 goto done; 2002 2003 /* 2004 * ALTERNATIVE ERROR RETURN. 2005 * 2006 * OBJECT EXPECTED TO BE LOCKED. 2007 */ 2008 unpin: 2009 i915_gem_object_ggtt_unpin_view(obj, &view); 2010 unlock: 2011 mutex_unlock(&dev->struct_mutex); 2012 out: 2013 switch (ret) { 2014 case -EIO: 2015 /* 2016 * We eat errors when the gpu is terminally wedged to avoid 2017 * userspace unduly crashing (gl has no provisions for mmaps to 2018 * fail). But any other -EIO isn't ours (e.g. swap in failure) 2019 * and so needs to be reported. 2020 */ 2021 if (!i915_terminally_wedged(&dev_priv->gpu_error)) { 2022 // ret = VM_FAULT_SIGBUS; 2023 break; 2024 } 2025 case -EAGAIN: 2026 /* 2027 * EAGAIN means the gpu is hung and we'll wait for the error 2028 * handler to reset everything when re-faulting in 2029 * i915_mutex_lock_interruptible. 2030 */ 2031 case -ERESTARTSYS: 2032 case -EINTR: 2033 VM_OBJECT_UNLOCK(vm_obj); 2034 int dummy; 2035 tsleep(&dummy, 0, "delay", 1); /* XXX */ 2036 VM_OBJECT_LOCK(vm_obj); 2037 goto retry; 2038 default: 2039 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 2040 ret = VM_PAGER_ERROR; 2041 break; 2042 } 2043 2044 done: 2045 vm_object_pip_wakeup(vm_obj); 2046 2047 intel_runtime_pm_put(dev_priv); 2048 return ret; 2049 } 2050 2051 /** 2052 * i915_gem_release_mmap - remove physical page mappings 2053 * @obj: obj in question 2054 * 2055 * Preserve the reservation of the mmapping with the DRM core code, but 2056 * relinquish ownership of the pages back to the system. 2057 * 2058 * It is vital that we remove the page mapping if we have mapped a tiled 2059 * object through the GTT and then lose the fence register due to 2060 * resource pressure. Similarly if the object has been moved out of the 2061 * aperture, than pages mapped into userspace must be revoked. Removing the 2062 * mapping will then trigger a page fault on the next user access, allowing 2063 * fixup by i915_gem_fault(). 2064 */ 2065 void 2066 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 2067 { 2068 vm_object_t devobj; 2069 vm_page_t m; 2070 int i, page_count; 2071 2072 /* Serialisation between user GTT access and our code depends upon 2073 * revoking the CPU's PTE whilst the mutex is held. The next user 2074 * pagefault then has to wait until we release the mutex. 2075 */ 2076 lockdep_assert_held(&obj->base.dev->struct_mutex); 2077 2078 if (!obj->fault_mappable) 2079 return; 2080 2081 devobj = cdev_pager_lookup(obj); 2082 if (devobj != NULL) { 2083 page_count = OFF_TO_IDX(obj->base.size); 2084 2085 VM_OBJECT_LOCK(devobj); 2086 for (i = 0; i < page_count; i++) { 2087 m = vm_page_lookup_busy_wait(devobj, i, TRUE, "915unm"); 2088 if (m == NULL) 2089 continue; 2090 cdev_pager_free_page(devobj, m); 2091 } 2092 VM_OBJECT_UNLOCK(devobj); 2093 vm_object_deallocate(devobj); 2094 } 2095 2096 /* Ensure that the CPU's PTE are revoked and there are not outstanding 2097 * memory transactions from userspace before we return. The TLB 2098 * flushing implied above by changing the PTE above *should* be 2099 * sufficient, an extra barrier here just provides us with a bit 2100 * of paranoid documentation about our requirement to serialise 2101 * memory writes before touching registers / GSM. 2102 */ 2103 wmb(); 2104 2105 obj->fault_mappable = false; 2106 } 2107 2108 void 2109 i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv) 2110 { 2111 struct drm_i915_gem_object *obj; 2112 2113 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) 2114 i915_gem_release_mmap(obj); 2115 } 2116 2117 uint32_t 2118 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) 2119 { 2120 uint32_t gtt_size; 2121 2122 if (INTEL_INFO(dev)->gen >= 4 || 2123 tiling_mode == I915_TILING_NONE) 2124 return size; 2125 2126 /* Previous chips need a power-of-two fence region when tiling */ 2127 if (INTEL_INFO(dev)->gen == 3) 2128 gtt_size = 1024*1024; 2129 else 2130 gtt_size = 512*1024; 2131 2132 while (gtt_size < size) 2133 gtt_size <<= 1; 2134 2135 return gtt_size; 2136 } 2137 2138 /** 2139 * i915_gem_get_gtt_alignment - return required GTT alignment for an object 2140 * @obj: object to check 2141 * 2142 * Return the required GTT alignment for an object, taking into account 2143 * potential fence register mapping. 2144 */ 2145 uint32_t 2146 i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size, 2147 int tiling_mode, bool fenced) 2148 { 2149 /* 2150 * Minimum alignment is 4k (GTT page size), but might be greater 2151 * if a fence register is needed for the object. 2152 */ 2153 if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) || 2154 tiling_mode == I915_TILING_NONE) 2155 return 4096; 2156 2157 /* 2158 * Previous chips need to be aligned to the size of the smallest 2159 * fence register that can contain the object. 2160 */ 2161 return i915_gem_get_gtt_size(dev, size, tiling_mode); 2162 } 2163 2164 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) 2165 { 2166 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2167 int ret; 2168 2169 #if 0 2170 if (drm_vma_node_has_offset(&obj->base.vma_node)) 2171 return 0; 2172 #endif 2173 2174 dev_priv->mm.shrinker_no_lock_stealing = true; 2175 2176 ret = drm_gem_create_mmap_offset(&obj->base); 2177 if (ret != -ENOSPC) 2178 goto out; 2179 2180 /* Badly fragmented mmap space? The only way we can recover 2181 * space is by destroying unwanted objects. We can't randomly release 2182 * mmap_offsets as userspace expects them to be persistent for the 2183 * lifetime of the objects. The closest we can is to release the 2184 * offsets on purgeable objects by truncating it and marking it purged, 2185 * which prevents userspace from ever using that object again. 2186 */ 2187 i915_gem_shrink(dev_priv, 2188 obj->base.size >> PAGE_SHIFT, 2189 I915_SHRINK_BOUND | 2190 I915_SHRINK_UNBOUND | 2191 I915_SHRINK_PURGEABLE); 2192 ret = drm_gem_create_mmap_offset(&obj->base); 2193 if (ret != -ENOSPC) 2194 goto out; 2195 2196 i915_gem_shrink_all(dev_priv); 2197 ret = drm_gem_create_mmap_offset(&obj->base); 2198 out: 2199 dev_priv->mm.shrinker_no_lock_stealing = false; 2200 2201 return ret; 2202 } 2203 2204 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) 2205 { 2206 drm_gem_free_mmap_offset(&obj->base); 2207 } 2208 2209 int 2210 i915_gem_mmap_gtt(struct drm_file *file, 2211 struct drm_device *dev, 2212 uint32_t handle, 2213 uint64_t *offset) 2214 { 2215 struct drm_i915_gem_object *obj; 2216 int ret; 2217 2218 ret = i915_mutex_lock_interruptible(dev); 2219 if (ret) 2220 return ret; 2221 2222 obj = to_intel_bo(drm_gem_object_lookup(file, handle)); 2223 if (&obj->base == NULL) { 2224 ret = -ENOENT; 2225 goto unlock; 2226 } 2227 2228 if (obj->madv != I915_MADV_WILLNEED) { 2229 DRM_DEBUG("Attempting to mmap a purgeable buffer\n"); 2230 ret = -EFAULT; 2231 goto out; 2232 } 2233 2234 ret = i915_gem_object_create_mmap_offset(obj); 2235 if (ret) 2236 goto out; 2237 2238 *offset = DRM_GEM_MAPPING_OFF(obj->base.map_list.key) | 2239 DRM_GEM_MAPPING_KEY; 2240 2241 out: 2242 drm_gem_object_unreference(&obj->base); 2243 unlock: 2244 mutex_unlock(&dev->struct_mutex); 2245 return ret; 2246 } 2247 2248 /** 2249 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 2250 * @dev: DRM device 2251 * @data: GTT mapping ioctl data 2252 * @file: GEM object info 2253 * 2254 * Simply returns the fake offset to userspace so it can mmap it. 2255 * The mmap call will end up in drm_gem_mmap(), which will set things 2256 * up so we can get faults in the handler above. 2257 * 2258 * The fault handler will take care of binding the object into the GTT 2259 * (since it may have been evicted to make room for something), allocating 2260 * a fence register, and mapping the appropriate aperture address into 2261 * userspace. 2262 */ 2263 int 2264 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 2265 struct drm_file *file) 2266 { 2267 struct drm_i915_gem_mmap_gtt *args = data; 2268 2269 return i915_gem_mmap_gtt(file, dev, args->handle, (uint64_t *)&args->offset); 2270 } 2271 2272 /* Immediately discard the backing storage */ 2273 static void 2274 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 2275 { 2276 vm_object_t vm_obj; 2277 2278 vm_obj = obj->base.filp; 2279 VM_OBJECT_LOCK(vm_obj); 2280 vm_object_page_remove(vm_obj, 0, 0, false); 2281 VM_OBJECT_UNLOCK(vm_obj); 2282 2283 obj->madv = __I915_MADV_PURGED; 2284 } 2285 2286 /* Try to discard unwanted pages */ 2287 static void 2288 i915_gem_object_invalidate(struct drm_i915_gem_object *obj) 2289 { 2290 #if 0 2291 struct address_space *mapping; 2292 #endif 2293 2294 switch (obj->madv) { 2295 case I915_MADV_DONTNEED: 2296 i915_gem_object_truncate(obj); 2297 case __I915_MADV_PURGED: 2298 return; 2299 } 2300 2301 if (obj->base.filp == NULL) 2302 return; 2303 2304 #if 0 2305 mapping = file_inode(obj->base.filp)->i_mapping, 2306 #endif 2307 invalidate_mapping_pages(obj->base.filp, 0, (loff_t)-1); 2308 } 2309 2310 static void 2311 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) 2312 { 2313 struct sg_page_iter sg_iter; 2314 int ret; 2315 2316 BUG_ON(obj->madv == __I915_MADV_PURGED); 2317 2318 ret = i915_gem_object_set_to_cpu_domain(obj, true); 2319 if (WARN_ON(ret)) { 2320 /* In the event of a disaster, abandon all caches and 2321 * hope for the best. 2322 */ 2323 i915_gem_clflush_object(obj, true); 2324 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 2325 } 2326 2327 i915_gem_gtt_finish_object(obj); 2328 2329 if (i915_gem_object_needs_bit17_swizzle(obj)) 2330 i915_gem_object_save_bit_17_swizzle(obj); 2331 2332 if (obj->madv == I915_MADV_DONTNEED) 2333 obj->dirty = 0; 2334 2335 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) { 2336 struct page *page = sg_page_iter_page(&sg_iter); 2337 2338 if (obj->dirty) 2339 set_page_dirty(page); 2340 2341 if (obj->madv == I915_MADV_WILLNEED) 2342 mark_page_accessed(page); 2343 2344 vm_page_busy_wait((struct vm_page *)page, FALSE, "i915gem"); 2345 vm_page_unwire((struct vm_page *)page, 1); 2346 vm_page_wakeup((struct vm_page *)page); 2347 } 2348 obj->dirty = 0; 2349 2350 sg_free_table(obj->pages); 2351 kfree(obj->pages); 2352 } 2353 2354 int 2355 i915_gem_object_put_pages(struct drm_i915_gem_object *obj) 2356 { 2357 const struct drm_i915_gem_object_ops *ops = obj->ops; 2358 2359 if (obj->pages == NULL) 2360 return 0; 2361 2362 if (obj->pages_pin_count) 2363 return -EBUSY; 2364 2365 BUG_ON(i915_gem_obj_bound_any(obj)); 2366 2367 /* ->put_pages might need to allocate memory for the bit17 swizzle 2368 * array, hence protect them from being reaped by removing them from gtt 2369 * lists early. */ 2370 list_del(&obj->global_list); 2371 2372 if (obj->mapping) { 2373 if (is_vmalloc_addr(obj->mapping)) 2374 vunmap(obj->mapping); 2375 else 2376 kunmap(kmap_to_page(obj->mapping)); 2377 obj->mapping = NULL; 2378 } 2379 2380 ops->put_pages(obj); 2381 obj->pages = NULL; 2382 2383 i915_gem_object_invalidate(obj); 2384 2385 return 0; 2386 } 2387 2388 static int 2389 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 2390 { 2391 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2392 int page_count, i; 2393 vm_object_t vm_obj; 2394 struct sg_table *st; 2395 struct scatterlist *sg; 2396 struct sg_page_iter sg_iter; 2397 struct page *page; 2398 unsigned long last_pfn = 0; /* suppress gcc warning */ 2399 int ret; 2400 2401 /* Assert that the object is not currently in any GPU domain. As it 2402 * wasn't in the GTT, there shouldn't be any way it could have been in 2403 * a GPU cache 2404 */ 2405 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); 2406 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); 2407 2408 st = kmalloc(sizeof(*st), M_DRM, M_WAITOK); 2409 if (st == NULL) 2410 return -ENOMEM; 2411 2412 page_count = obj->base.size / PAGE_SIZE; 2413 if (sg_alloc_table(st, page_count, GFP_KERNEL)) { 2414 kfree(st); 2415 return -ENOMEM; 2416 } 2417 2418 /* Get the list of pages out of our struct file. They'll be pinned 2419 * at this point until we release them. 2420 * 2421 * Fail silently without starting the shrinker 2422 */ 2423 vm_obj = obj->base.filp; 2424 VM_OBJECT_LOCK(vm_obj); 2425 sg = st->sgl; 2426 st->nents = 0; 2427 for (i = 0; i < page_count; i++) { 2428 page = shmem_read_mapping_page(vm_obj, i); 2429 if (IS_ERR(page)) { 2430 i915_gem_shrink(dev_priv, 2431 page_count, 2432 I915_SHRINK_BOUND | 2433 I915_SHRINK_UNBOUND | 2434 I915_SHRINK_PURGEABLE); 2435 page = shmem_read_mapping_page(vm_obj, i); 2436 } 2437 if (IS_ERR(page)) { 2438 /* We've tried hard to allocate the memory by reaping 2439 * our own buffer, now let the real VM do its job and 2440 * go down in flames if truly OOM. 2441 */ 2442 i915_gem_shrink_all(dev_priv); 2443 page = shmem_read_mapping_page(vm_obj, i); 2444 if (IS_ERR(page)) { 2445 ret = PTR_ERR(page); 2446 goto err_pages; 2447 } 2448 } 2449 #ifdef CONFIG_SWIOTLB 2450 if (swiotlb_nr_tbl()) { 2451 st->nents++; 2452 sg_set_page(sg, page, PAGE_SIZE, 0); 2453 sg = sg_next(sg); 2454 continue; 2455 } 2456 #endif 2457 if (!i || page_to_pfn(page) != last_pfn + 1) { 2458 if (i) 2459 sg = sg_next(sg); 2460 st->nents++; 2461 sg_set_page(sg, page, PAGE_SIZE, 0); 2462 } else { 2463 sg->length += PAGE_SIZE; 2464 } 2465 last_pfn = page_to_pfn(page); 2466 2467 /* Check that the i965g/gm workaround works. */ 2468 } 2469 #ifdef CONFIG_SWIOTLB 2470 if (!swiotlb_nr_tbl()) 2471 #endif 2472 sg_mark_end(sg); 2473 obj->pages = st; 2474 VM_OBJECT_UNLOCK(vm_obj); 2475 2476 ret = i915_gem_gtt_prepare_object(obj); 2477 if (ret) 2478 goto err_pages; 2479 2480 if (i915_gem_object_needs_bit17_swizzle(obj)) 2481 i915_gem_object_do_bit_17_swizzle(obj); 2482 2483 if (obj->tiling_mode != I915_TILING_NONE && 2484 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) 2485 i915_gem_object_pin_pages(obj); 2486 2487 return 0; 2488 2489 err_pages: 2490 sg_mark_end(sg); 2491 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) { 2492 struct vm_page *vmp = (struct vm_page *)sg_page_iter_page(&sg_iter); 2493 vm_page_busy_wait(vmp, FALSE, "i915gem"); 2494 vm_page_unwire(vmp, 0); 2495 vm_page_wakeup(vmp); 2496 } 2497 VM_OBJECT_UNLOCK(vm_obj); 2498 sg_free_table(st); 2499 kfree(st); 2500 2501 /* shmemfs first checks if there is enough memory to allocate the page 2502 * and reports ENOSPC should there be insufficient, along with the usual 2503 * ENOMEM for a genuine allocation failure. 2504 * 2505 * We use ENOSPC in our driver to mean that we have run out of aperture 2506 * space and so want to translate the error from shmemfs back to our 2507 * usual understanding of ENOMEM. 2508 */ 2509 if (ret == -ENOSPC) 2510 ret = -ENOMEM; 2511 2512 return ret; 2513 } 2514 2515 /* Ensure that the associated pages are gathered from the backing storage 2516 * and pinned into our object. i915_gem_object_get_pages() may be called 2517 * multiple times before they are released by a single call to 2518 * i915_gem_object_put_pages() - once the pages are no longer referenced 2519 * either as a result of memory pressure (reaping pages under the shrinker) 2520 * or as the object is itself released. 2521 */ 2522 int 2523 i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2524 { 2525 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2526 const struct drm_i915_gem_object_ops *ops = obj->ops; 2527 int ret; 2528 2529 if (obj->pages) 2530 return 0; 2531 2532 if (obj->madv != I915_MADV_WILLNEED) { 2533 DRM_DEBUG("Attempting to obtain a purgeable object\n"); 2534 return -EFAULT; 2535 } 2536 2537 BUG_ON(obj->pages_pin_count); 2538 2539 ret = ops->get_pages(obj); 2540 if (ret) 2541 return ret; 2542 2543 list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list); 2544 2545 obj->get_page.sg = obj->pages->sgl; 2546 obj->get_page.last = 0; 2547 2548 return 0; 2549 } 2550 2551 void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj) 2552 { 2553 int ret; 2554 2555 lockdep_assert_held(&obj->base.dev->struct_mutex); 2556 2557 ret = i915_gem_object_get_pages(obj); 2558 if (ret) 2559 return ERR_PTR(ret); 2560 2561 i915_gem_object_pin_pages(obj); 2562 2563 if (obj->mapping == NULL) { 2564 struct page **pages; 2565 2566 pages = NULL; 2567 if (obj->base.size == PAGE_SIZE) 2568 obj->mapping = kmap(sg_page(obj->pages->sgl)); 2569 else 2570 pages = drm_malloc_gfp(obj->base.size >> PAGE_SHIFT, 2571 sizeof(*pages), 2572 GFP_TEMPORARY); 2573 if (pages != NULL) { 2574 struct sg_page_iter sg_iter; 2575 int n; 2576 2577 n = 0; 2578 for_each_sg_page(obj->pages->sgl, &sg_iter, 2579 obj->pages->nents, 0) 2580 pages[n++] = sg_page_iter_page(&sg_iter); 2581 2582 obj->mapping = vmap(pages, n, 0, PAGE_KERNEL); 2583 drm_free_large(pages); 2584 } 2585 if (obj->mapping == NULL) { 2586 i915_gem_object_unpin_pages(obj); 2587 return ERR_PTR(-ENOMEM); 2588 } 2589 } 2590 2591 return obj->mapping; 2592 } 2593 2594 void i915_vma_move_to_active(struct i915_vma *vma, 2595 struct drm_i915_gem_request *req) 2596 { 2597 struct drm_i915_gem_object *obj = vma->obj; 2598 struct intel_engine_cs *engine; 2599 2600 engine = i915_gem_request_get_engine(req); 2601 2602 /* Add a reference if we're newly entering the active list. */ 2603 if (obj->active == 0) 2604 drm_gem_object_reference(&obj->base); 2605 obj->active |= intel_engine_flag(engine); 2606 2607 list_move_tail(&obj->engine_list[engine->id], &engine->active_list); 2608 i915_gem_request_assign(&obj->last_read_req[engine->id], req); 2609 2610 list_move_tail(&vma->vm_link, &vma->vm->active_list); 2611 } 2612 2613 static void 2614 i915_gem_object_retire__write(struct drm_i915_gem_object *obj) 2615 { 2616 GEM_BUG_ON(obj->last_write_req == NULL); 2617 GEM_BUG_ON(!(obj->active & intel_engine_flag(obj->last_write_req->engine))); 2618 2619 i915_gem_request_assign(&obj->last_write_req, NULL); 2620 intel_fb_obj_flush(obj, true, ORIGIN_CS); 2621 } 2622 2623 static void 2624 i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring) 2625 { 2626 struct i915_vma *vma; 2627 2628 GEM_BUG_ON(obj->last_read_req[ring] == NULL); 2629 GEM_BUG_ON(!(obj->active & (1 << ring))); 2630 2631 list_del_init(&obj->engine_list[ring]); 2632 i915_gem_request_assign(&obj->last_read_req[ring], NULL); 2633 2634 if (obj->last_write_req && obj->last_write_req->engine->id == ring) 2635 i915_gem_object_retire__write(obj); 2636 2637 obj->active &= ~(1 << ring); 2638 if (obj->active) 2639 return; 2640 2641 /* Bump our place on the bound list to keep it roughly in LRU order 2642 * so that we don't steal from recently used but inactive objects 2643 * (unless we are forced to ofc!) 2644 */ 2645 list_move_tail(&obj->global_list, 2646 &to_i915(obj->base.dev)->mm.bound_list); 2647 2648 list_for_each_entry(vma, &obj->vma_list, obj_link) { 2649 if (!list_empty(&vma->vm_link)) 2650 list_move_tail(&vma->vm_link, &vma->vm->inactive_list); 2651 } 2652 2653 i915_gem_request_assign(&obj->last_fenced_req, NULL); 2654 drm_gem_object_unreference(&obj->base); 2655 } 2656 2657 static int 2658 i915_gem_init_seqno(struct drm_device *dev, u32 seqno) 2659 { 2660 struct drm_i915_private *dev_priv = dev->dev_private; 2661 struct intel_engine_cs *engine; 2662 int ret; 2663 2664 /* Carefully retire all requests without writing to the rings */ 2665 for_each_engine(engine, dev_priv) { 2666 ret = intel_engine_idle(engine); 2667 if (ret) 2668 return ret; 2669 } 2670 i915_gem_retire_requests(dev); 2671 2672 /* Finally reset hw state */ 2673 for_each_engine(engine, dev_priv) 2674 intel_ring_init_seqno(engine, seqno); 2675 2676 return 0; 2677 } 2678 2679 int i915_gem_set_seqno(struct drm_device *dev, u32 seqno) 2680 { 2681 struct drm_i915_private *dev_priv = dev->dev_private; 2682 int ret; 2683 2684 if (seqno == 0) 2685 return -EINVAL; 2686 2687 /* HWS page needs to be set less than what we 2688 * will inject to ring 2689 */ 2690 ret = i915_gem_init_seqno(dev, seqno - 1); 2691 if (ret) 2692 return ret; 2693 2694 /* Carefully set the last_seqno value so that wrap 2695 * detection still works 2696 */ 2697 dev_priv->next_seqno = seqno; 2698 dev_priv->last_seqno = seqno - 1; 2699 if (dev_priv->last_seqno == 0) 2700 dev_priv->last_seqno--; 2701 2702 return 0; 2703 } 2704 2705 int 2706 i915_gem_get_seqno(struct drm_device *dev, u32 *seqno) 2707 { 2708 struct drm_i915_private *dev_priv = dev->dev_private; 2709 2710 /* reserve 0 for non-seqno */ 2711 if (dev_priv->next_seqno == 0) { 2712 int ret = i915_gem_init_seqno(dev, 0); 2713 if (ret) 2714 return ret; 2715 2716 dev_priv->next_seqno = 1; 2717 } 2718 2719 *seqno = dev_priv->last_seqno = dev_priv->next_seqno++; 2720 return 0; 2721 } 2722 2723 /* 2724 * NB: This function is not allowed to fail. Doing so would mean the the 2725 * request is not being tracked for completion but the work itself is 2726 * going to happen on the hardware. This would be a Bad Thing(tm). 2727 */ 2728 void __i915_add_request(struct drm_i915_gem_request *request, 2729 struct drm_i915_gem_object *obj, 2730 bool flush_caches) 2731 { 2732 struct intel_engine_cs *engine; 2733 struct drm_i915_private *dev_priv; 2734 struct intel_ringbuffer *ringbuf; 2735 u32 request_start; 2736 int ret; 2737 2738 if (WARN_ON(request == NULL)) 2739 return; 2740 2741 engine = request->engine; 2742 dev_priv = request->i915; 2743 ringbuf = request->ringbuf; 2744 2745 /* 2746 * To ensure that this call will not fail, space for its emissions 2747 * should already have been reserved in the ring buffer. Let the ring 2748 * know that it is time to use that space up. 2749 */ 2750 intel_ring_reserved_space_use(ringbuf); 2751 2752 request_start = intel_ring_get_tail(ringbuf); 2753 /* 2754 * Emit any outstanding flushes - execbuf can fail to emit the flush 2755 * after having emitted the batchbuffer command. Hence we need to fix 2756 * things up similar to emitting the lazy request. The difference here 2757 * is that the flush _must_ happen before the next request, no matter 2758 * what. 2759 */ 2760 if (flush_caches) { 2761 if (i915.enable_execlists) 2762 ret = logical_ring_flush_all_caches(request); 2763 else 2764 ret = intel_ring_flush_all_caches(request); 2765 /* Not allowed to fail! */ 2766 WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret); 2767 } 2768 2769 trace_i915_gem_request_add(request); 2770 2771 request->head = request_start; 2772 2773 /* Whilst this request exists, batch_obj will be on the 2774 * active_list, and so will hold the active reference. Only when this 2775 * request is retired will the the batch_obj be moved onto the 2776 * inactive_list and lose its active reference. Hence we do not need 2777 * to explicitly hold another reference here. 2778 */ 2779 request->batch_obj = obj; 2780 2781 /* Seal the request and mark it as pending execution. Note that 2782 * we may inspect this state, without holding any locks, during 2783 * hangcheck. Hence we apply the barrier to ensure that we do not 2784 * see a more recent value in the hws than we are tracking. 2785 */ 2786 request->emitted_jiffies = jiffies; 2787 request->previous_seqno = engine->last_submitted_seqno; 2788 smp_store_mb(engine->last_submitted_seqno, request->seqno); 2789 list_add_tail(&request->list, &engine->request_list); 2790 2791 /* Record the position of the start of the request so that 2792 * should we detect the updated seqno part-way through the 2793 * GPU processing the request, we never over-estimate the 2794 * position of the head. 2795 */ 2796 request->postfix = intel_ring_get_tail(ringbuf); 2797 2798 if (i915.enable_execlists) 2799 ret = engine->emit_request(request); 2800 else { 2801 ret = engine->add_request(request); 2802 2803 request->tail = intel_ring_get_tail(ringbuf); 2804 } 2805 2806 /* Not allowed to fail! */ 2807 WARN(ret, "emit|add_request failed: %d!\n", ret); 2808 2809 i915_queue_hangcheck(engine->dev); 2810 2811 queue_delayed_work(dev_priv->wq, 2812 &dev_priv->mm.retire_work, 2813 round_jiffies_up_relative(HZ)); 2814 intel_mark_busy(dev_priv->dev); 2815 2816 /* Sanity check that the reserved size was large enough. */ 2817 intel_ring_reserved_space_end(ringbuf); 2818 } 2819 2820 static bool i915_context_is_banned(struct drm_i915_private *dev_priv, 2821 const struct intel_context *ctx) 2822 { 2823 unsigned long elapsed; 2824 2825 elapsed = get_seconds() - ctx->hang_stats.guilty_ts; 2826 2827 if (ctx->hang_stats.banned) 2828 return true; 2829 2830 if (ctx->hang_stats.ban_period_seconds && 2831 elapsed <= ctx->hang_stats.ban_period_seconds) { 2832 if (!i915_gem_context_is_default(ctx)) { 2833 DRM_DEBUG("context hanging too fast, banning!\n"); 2834 return true; 2835 } else if (i915_stop_ring_allow_ban(dev_priv)) { 2836 if (i915_stop_ring_allow_warn(dev_priv)) 2837 DRM_ERROR("gpu hanging too fast, banning!\n"); 2838 return true; 2839 } 2840 } 2841 2842 return false; 2843 } 2844 2845 static void i915_set_reset_status(struct drm_i915_private *dev_priv, 2846 struct intel_context *ctx, 2847 const bool guilty) 2848 { 2849 struct i915_ctx_hang_stats *hs; 2850 2851 if (WARN_ON(!ctx)) 2852 return; 2853 2854 hs = &ctx->hang_stats; 2855 2856 if (guilty) { 2857 hs->banned = i915_context_is_banned(dev_priv, ctx); 2858 hs->batch_active++; 2859 hs->guilty_ts = get_seconds(); 2860 } else { 2861 hs->batch_pending++; 2862 } 2863 } 2864 2865 void i915_gem_request_free(struct kref *req_ref) 2866 { 2867 struct drm_i915_gem_request *req = container_of(req_ref, 2868 typeof(*req), ref); 2869 struct intel_context *ctx = req->ctx; 2870 2871 if (req->file_priv) 2872 i915_gem_request_remove_from_client(req); 2873 2874 if (ctx) { 2875 if (i915.enable_execlists && ctx != req->i915->kernel_context) 2876 intel_lr_context_unpin(ctx, req->engine); 2877 2878 i915_gem_context_unreference(ctx); 2879 } 2880 2881 kfree(req); 2882 } 2883 2884 static inline int 2885 __i915_gem_request_alloc(struct intel_engine_cs *engine, 2886 struct intel_context *ctx, 2887 struct drm_i915_gem_request **req_out) 2888 { 2889 struct drm_i915_private *dev_priv = to_i915(engine->dev); 2890 unsigned reset_counter = i915_reset_counter(&dev_priv->gpu_error); 2891 struct drm_i915_gem_request *req; 2892 int ret; 2893 2894 if (!req_out) 2895 return -EINVAL; 2896 2897 *req_out = NULL; 2898 2899 /* ABI: Before userspace accesses the GPU (e.g. execbuffer), report 2900 * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex 2901 * and restart. 2902 */ 2903 ret = i915_gem_check_wedge(reset_counter, dev_priv->mm.interruptible); 2904 if (ret) 2905 return ret; 2906 2907 req = kzalloc(sizeof(*req), GFP_KERNEL); 2908 if (req == NULL) 2909 return -ENOMEM; 2910 2911 ret = i915_gem_get_seqno(engine->dev, &req->seqno); 2912 if (ret) 2913 goto err; 2914 2915 kref_init(&req->ref); 2916 req->i915 = dev_priv; 2917 req->engine = engine; 2918 req->reset_counter = reset_counter; 2919 req->ctx = ctx; 2920 i915_gem_context_reference(req->ctx); 2921 2922 if (i915.enable_execlists) 2923 ret = intel_logical_ring_alloc_request_extras(req); 2924 else 2925 ret = intel_ring_alloc_request_extras(req); 2926 if (ret) { 2927 i915_gem_context_unreference(req->ctx); 2928 goto err; 2929 } 2930 2931 /* 2932 * Reserve space in the ring buffer for all the commands required to 2933 * eventually emit this request. This is to guarantee that the 2934 * i915_add_request() call can't fail. Note that the reserve may need 2935 * to be redone if the request is not actually submitted straight 2936 * away, e.g. because a GPU scheduler has deferred it. 2937 */ 2938 if (i915.enable_execlists) 2939 ret = intel_logical_ring_reserve_space(req); 2940 else 2941 ret = intel_ring_reserve_space(req); 2942 if (ret) { 2943 /* 2944 * At this point, the request is fully allocated even if not 2945 * fully prepared. Thus it can be cleaned up using the proper 2946 * free code. 2947 */ 2948 intel_ring_reserved_space_cancel(req->ringbuf); 2949 i915_gem_request_unreference(req); 2950 return ret; 2951 } 2952 2953 *req_out = req; 2954 return 0; 2955 2956 err: 2957 kfree(req); 2958 return ret; 2959 } 2960 2961 /** 2962 * i915_gem_request_alloc - allocate a request structure 2963 * 2964 * @engine: engine that we wish to issue the request on. 2965 * @ctx: context that the request will be associated with. 2966 * This can be NULL if the request is not directly related to 2967 * any specific user context, in which case this function will 2968 * choose an appropriate context to use. 2969 * 2970 * Returns a pointer to the allocated request if successful, 2971 * or an error code if not. 2972 */ 2973 struct drm_i915_gem_request * 2974 i915_gem_request_alloc(struct intel_engine_cs *engine, 2975 struct intel_context *ctx) 2976 { 2977 struct drm_i915_gem_request *req; 2978 int err; 2979 2980 if (ctx == NULL) 2981 ctx = to_i915(engine->dev)->kernel_context; 2982 err = __i915_gem_request_alloc(engine, ctx, &req); 2983 return err ? ERR_PTR(err) : req; 2984 } 2985 2986 struct drm_i915_gem_request * 2987 i915_gem_find_active_request(struct intel_engine_cs *engine) 2988 { 2989 struct drm_i915_gem_request *request; 2990 2991 list_for_each_entry(request, &engine->request_list, list) { 2992 if (i915_gem_request_completed(request, false)) 2993 continue; 2994 2995 return request; 2996 } 2997 2998 return NULL; 2999 } 3000 3001 static void i915_gem_reset_engine_status(struct drm_i915_private *dev_priv, 3002 struct intel_engine_cs *engine) 3003 { 3004 struct drm_i915_gem_request *request; 3005 bool ring_hung; 3006 3007 request = i915_gem_find_active_request(engine); 3008 3009 if (request == NULL) 3010 return; 3011 3012 ring_hung = engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG; 3013 3014 i915_set_reset_status(dev_priv, request->ctx, ring_hung); 3015 3016 list_for_each_entry_continue(request, &engine->request_list, list) 3017 i915_set_reset_status(dev_priv, request->ctx, false); 3018 } 3019 3020 static void i915_gem_reset_engine_cleanup(struct drm_i915_private *dev_priv, 3021 struct intel_engine_cs *engine) 3022 { 3023 struct intel_ringbuffer *buffer; 3024 3025 while (!list_empty(&engine->active_list)) { 3026 struct drm_i915_gem_object *obj; 3027 3028 obj = list_first_entry(&engine->active_list, 3029 struct drm_i915_gem_object, 3030 engine_list[engine->id]); 3031 3032 i915_gem_object_retire__read(obj, engine->id); 3033 } 3034 3035 /* 3036 * Clear the execlists queue up before freeing the requests, as those 3037 * are the ones that keep the context and ringbuffer backing objects 3038 * pinned in place. 3039 */ 3040 3041 if (i915.enable_execlists) { 3042 /* Ensure irq handler finishes or is cancelled. */ 3043 tasklet_kill(&engine->irq_tasklet); 3044 3045 spin_lock_bh(&engine->execlist_lock); 3046 /* list_splice_tail_init checks for empty lists */ 3047 list_splice_tail_init(&engine->execlist_queue, 3048 &engine->execlist_retired_req_list); 3049 spin_unlock_bh(&engine->execlist_lock); 3050 3051 intel_execlists_retire_requests(engine); 3052 } 3053 3054 /* 3055 * We must free the requests after all the corresponding objects have 3056 * been moved off active lists. Which is the same order as the normal 3057 * retire_requests function does. This is important if object hold 3058 * implicit references on things like e.g. ppgtt address spaces through 3059 * the request. 3060 */ 3061 while (!list_empty(&engine->request_list)) { 3062 struct drm_i915_gem_request *request; 3063 3064 request = list_first_entry(&engine->request_list, 3065 struct drm_i915_gem_request, 3066 list); 3067 3068 i915_gem_request_retire(request); 3069 } 3070 3071 /* Having flushed all requests from all queues, we know that all 3072 * ringbuffers must now be empty. However, since we do not reclaim 3073 * all space when retiring the request (to prevent HEADs colliding 3074 * with rapid ringbuffer wraparound) the amount of available space 3075 * upon reset is less than when we start. Do one more pass over 3076 * all the ringbuffers to reset last_retired_head. 3077 */ 3078 list_for_each_entry(buffer, &engine->buffers, link) { 3079 buffer->last_retired_head = buffer->tail; 3080 intel_ring_update_space(buffer); 3081 } 3082 3083 intel_ring_init_seqno(engine, engine->last_submitted_seqno); 3084 } 3085 3086 void i915_gem_reset(struct drm_device *dev) 3087 { 3088 struct drm_i915_private *dev_priv = dev->dev_private; 3089 struct intel_engine_cs *engine; 3090 3091 /* 3092 * Before we free the objects from the requests, we need to inspect 3093 * them for finding the guilty party. As the requests only borrow 3094 * their reference to the objects, the inspection must be done first. 3095 */ 3096 for_each_engine(engine, dev_priv) 3097 i915_gem_reset_engine_status(dev_priv, engine); 3098 3099 for_each_engine(engine, dev_priv) 3100 i915_gem_reset_engine_cleanup(dev_priv, engine); 3101 3102 i915_gem_context_reset(dev); 3103 3104 i915_gem_restore_fences(dev); 3105 3106 WARN_ON(i915_verify_lists(dev)); 3107 } 3108 3109 /** 3110 * This function clears the request list as sequence numbers are passed. 3111 */ 3112 void 3113 i915_gem_retire_requests_ring(struct intel_engine_cs *engine) 3114 { 3115 WARN_ON(i915_verify_lists(engine->dev)); 3116 3117 /* Retire requests first as we use it above for the early return. 3118 * If we retire requests last, we may use a later seqno and so clear 3119 * the requests lists without clearing the active list, leading to 3120 * confusion. 3121 */ 3122 while (!list_empty(&engine->request_list)) { 3123 struct drm_i915_gem_request *request; 3124 3125 request = list_first_entry(&engine->request_list, 3126 struct drm_i915_gem_request, 3127 list); 3128 3129 if (!i915_gem_request_completed(request, true)) 3130 break; 3131 3132 i915_gem_request_retire(request); 3133 } 3134 3135 /* Move any buffers on the active list that are no longer referenced 3136 * by the ringbuffer to the flushing/inactive lists as appropriate, 3137 * before we free the context associated with the requests. 3138 */ 3139 while (!list_empty(&engine->active_list)) { 3140 struct drm_i915_gem_object *obj; 3141 3142 obj = list_first_entry(&engine->active_list, 3143 struct drm_i915_gem_object, 3144 engine_list[engine->id]); 3145 3146 if (!list_empty(&obj->last_read_req[engine->id]->list)) 3147 break; 3148 3149 i915_gem_object_retire__read(obj, engine->id); 3150 } 3151 3152 if (unlikely(engine->trace_irq_req && 3153 i915_gem_request_completed(engine->trace_irq_req, true))) { 3154 engine->irq_put(engine); 3155 i915_gem_request_assign(&engine->trace_irq_req, NULL); 3156 } 3157 3158 WARN_ON(i915_verify_lists(engine->dev)); 3159 } 3160 3161 bool 3162 i915_gem_retire_requests(struct drm_device *dev) 3163 { 3164 struct drm_i915_private *dev_priv = dev->dev_private; 3165 struct intel_engine_cs *engine; 3166 bool idle = true; 3167 3168 for_each_engine(engine, dev_priv) { 3169 i915_gem_retire_requests_ring(engine); 3170 idle &= list_empty(&engine->request_list); 3171 if (i915.enable_execlists) { 3172 spin_lock_bh(&engine->execlist_lock); 3173 idle &= list_empty(&engine->execlist_queue); 3174 spin_unlock_bh(&engine->execlist_lock); 3175 3176 intel_execlists_retire_requests(engine); 3177 } 3178 } 3179 3180 if (idle) 3181 mod_delayed_work(dev_priv->wq, 3182 &dev_priv->mm.idle_work, 3183 msecs_to_jiffies(100)); 3184 3185 return idle; 3186 } 3187 3188 static void 3189 i915_gem_retire_work_handler(struct work_struct *work) 3190 { 3191 struct drm_i915_private *dev_priv = 3192 container_of(work, typeof(*dev_priv), mm.retire_work.work); 3193 struct drm_device *dev = dev_priv->dev; 3194 bool idle; 3195 3196 /* Come back later if the device is busy... */ 3197 idle = false; 3198 if (mutex_trylock(&dev->struct_mutex)) { 3199 idle = i915_gem_retire_requests(dev); 3200 mutex_unlock(&dev->struct_mutex); 3201 } 3202 if (!idle) 3203 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 3204 round_jiffies_up_relative(HZ)); 3205 } 3206 3207 static void 3208 i915_gem_idle_work_handler(struct work_struct *work) 3209 { 3210 struct drm_i915_private *dev_priv = 3211 container_of(work, typeof(*dev_priv), mm.idle_work.work); 3212 struct drm_device *dev = dev_priv->dev; 3213 struct intel_engine_cs *engine; 3214 3215 for_each_engine(engine, dev_priv) 3216 if (!list_empty(&engine->request_list)) 3217 return; 3218 3219 /* we probably should sync with hangcheck here, using cancel_work_sync. 3220 * Also locking seems to be fubar here, engine->request_list is protected 3221 * by dev->struct_mutex. */ 3222 3223 intel_mark_idle(dev); 3224 3225 if (mutex_trylock(&dev->struct_mutex)) { 3226 for_each_engine(engine, dev_priv) 3227 i915_gem_batch_pool_fini(&engine->batch_pool); 3228 3229 mutex_unlock(&dev->struct_mutex); 3230 } 3231 } 3232 3233 /** 3234 * Ensures that an object will eventually get non-busy by flushing any required 3235 * write domains, emitting any outstanding lazy request and retiring and 3236 * completed requests. 3237 */ 3238 static int 3239 i915_gem_object_flush_active(struct drm_i915_gem_object *obj) 3240 { 3241 int i; 3242 3243 if (!obj->active) 3244 return 0; 3245 3246 for (i = 0; i < I915_NUM_ENGINES; i++) { 3247 struct drm_i915_gem_request *req; 3248 3249 req = obj->last_read_req[i]; 3250 if (req == NULL) 3251 continue; 3252 3253 if (list_empty(&req->list)) 3254 goto retire; 3255 3256 if (i915_gem_request_completed(req, true)) { 3257 __i915_gem_request_retire__upto(req); 3258 retire: 3259 i915_gem_object_retire__read(obj, i); 3260 } 3261 } 3262 3263 return 0; 3264 } 3265 3266 /** 3267 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT 3268 * @DRM_IOCTL_ARGS: standard ioctl arguments 3269 * 3270 * Returns 0 if successful, else an error is returned with the remaining time in 3271 * the timeout parameter. 3272 * -ETIME: object is still busy after timeout 3273 * -ERESTARTSYS: signal interrupted the wait 3274 * -ENONENT: object doesn't exist 3275 * Also possible, but rare: 3276 * -EAGAIN: GPU wedged 3277 * -ENOMEM: damn 3278 * -ENODEV: Internal IRQ fail 3279 * -E?: The add request failed 3280 * 3281 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any 3282 * non-zero timeout parameter the wait ioctl will wait for the given number of 3283 * nanoseconds on an object becoming unbusy. Since the wait itself does so 3284 * without holding struct_mutex the object may become re-busied before this 3285 * function completes. A similar but shorter * race condition exists in the busy 3286 * ioctl 3287 */ 3288 int 3289 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 3290 { 3291 struct drm_i915_gem_wait *args = data; 3292 struct drm_i915_gem_object *obj; 3293 struct drm_i915_gem_request *req[I915_NUM_ENGINES]; 3294 int i, n = 0; 3295 int ret; 3296 3297 if (args->flags != 0) 3298 return -EINVAL; 3299 3300 ret = i915_mutex_lock_interruptible(dev); 3301 if (ret) 3302 return ret; 3303 3304 obj = to_intel_bo(drm_gem_object_lookup(file, args->bo_handle)); 3305 if (&obj->base == NULL) { 3306 mutex_unlock(&dev->struct_mutex); 3307 return -ENOENT; 3308 } 3309 3310 /* Need to make sure the object gets inactive eventually. */ 3311 ret = i915_gem_object_flush_active(obj); 3312 if (ret) 3313 goto out; 3314 3315 if (!obj->active) 3316 goto out; 3317 3318 /* Do this after OLR check to make sure we make forward progress polling 3319 * on this IOCTL with a timeout == 0 (like busy ioctl) 3320 */ 3321 if (args->timeout_ns == 0) { 3322 ret = -ETIME; 3323 goto out; 3324 } 3325 3326 drm_gem_object_unreference(&obj->base); 3327 3328 for (i = 0; i < I915_NUM_ENGINES; i++) { 3329 if (obj->last_read_req[i] == NULL) 3330 continue; 3331 3332 req[n++] = i915_gem_request_reference(obj->last_read_req[i]); 3333 } 3334 3335 mutex_unlock(&dev->struct_mutex); 3336 3337 for (i = 0; i < n; i++) { 3338 if (ret == 0) 3339 ret = __i915_wait_request(req[i], true, 3340 args->timeout_ns > 0 ? &args->timeout_ns : NULL, 3341 to_rps_client(file)); 3342 i915_gem_request_unreference__unlocked(req[i]); 3343 } 3344 return ret; 3345 3346 out: 3347 drm_gem_object_unreference(&obj->base); 3348 mutex_unlock(&dev->struct_mutex); 3349 return ret; 3350 } 3351 3352 static int 3353 __i915_gem_object_sync(struct drm_i915_gem_object *obj, 3354 struct intel_engine_cs *to, 3355 struct drm_i915_gem_request *from_req, 3356 struct drm_i915_gem_request **to_req) 3357 { 3358 struct intel_engine_cs *from; 3359 int ret; 3360 3361 from = i915_gem_request_get_engine(from_req); 3362 if (to == from) 3363 return 0; 3364 3365 if (i915_gem_request_completed(from_req, true)) 3366 return 0; 3367 3368 if (!i915_semaphore_is_enabled(obj->base.dev)) { 3369 struct drm_i915_private *i915 = to_i915(obj->base.dev); 3370 ret = __i915_wait_request(from_req, 3371 i915->mm.interruptible, 3372 NULL, 3373 &i915->rps.semaphores); 3374 if (ret) 3375 return ret; 3376 3377 i915_gem_object_retire_request(obj, from_req); 3378 } else { 3379 int idx = intel_ring_sync_index(from, to); 3380 u32 seqno = i915_gem_request_get_seqno(from_req); 3381 3382 WARN_ON(!to_req); 3383 3384 if (seqno <= from->semaphore.sync_seqno[idx]) 3385 return 0; 3386 3387 if (*to_req == NULL) { 3388 struct drm_i915_gem_request *req; 3389 3390 req = i915_gem_request_alloc(to, NULL); 3391 if (IS_ERR(req)) 3392 return PTR_ERR(req); 3393 3394 *to_req = req; 3395 } 3396 3397 trace_i915_gem_ring_sync_to(*to_req, from, from_req); 3398 ret = to->semaphore.sync_to(*to_req, from, seqno); 3399 if (ret) 3400 return ret; 3401 3402 /* We use last_read_req because sync_to() 3403 * might have just caused seqno wrap under 3404 * the radar. 3405 */ 3406 from->semaphore.sync_seqno[idx] = 3407 i915_gem_request_get_seqno(obj->last_read_req[from->id]); 3408 } 3409 3410 return 0; 3411 } 3412 3413 /** 3414 * i915_gem_object_sync - sync an object to a ring. 3415 * 3416 * @obj: object which may be in use on another ring. 3417 * @to: ring we wish to use the object on. May be NULL. 3418 * @to_req: request we wish to use the object for. See below. 3419 * This will be allocated and returned if a request is 3420 * required but not passed in. 3421 * 3422 * This code is meant to abstract object synchronization with the GPU. 3423 * Calling with NULL implies synchronizing the object with the CPU 3424 * rather than a particular GPU ring. Conceptually we serialise writes 3425 * between engines inside the GPU. We only allow one engine to write 3426 * into a buffer at any time, but multiple readers. To ensure each has 3427 * a coherent view of memory, we must: 3428 * 3429 * - If there is an outstanding write request to the object, the new 3430 * request must wait for it to complete (either CPU or in hw, requests 3431 * on the same ring will be naturally ordered). 3432 * 3433 * - If we are a write request (pending_write_domain is set), the new 3434 * request must wait for outstanding read requests to complete. 3435 * 3436 * For CPU synchronisation (NULL to) no request is required. For syncing with 3437 * rings to_req must be non-NULL. However, a request does not have to be 3438 * pre-allocated. If *to_req is NULL and sync commands will be emitted then a 3439 * request will be allocated automatically and returned through *to_req. Note 3440 * that it is not guaranteed that commands will be emitted (because the system 3441 * might already be idle). Hence there is no need to create a request that 3442 * might never have any work submitted. Note further that if a request is 3443 * returned in *to_req, it is the responsibility of the caller to submit 3444 * that request (after potentially adding more work to it). 3445 * 3446 * Returns 0 if successful, else propagates up the lower layer error. 3447 */ 3448 int 3449 i915_gem_object_sync(struct drm_i915_gem_object *obj, 3450 struct intel_engine_cs *to, 3451 struct drm_i915_gem_request **to_req) 3452 { 3453 const bool readonly = obj->base.pending_write_domain == 0; 3454 struct drm_i915_gem_request *req[I915_NUM_ENGINES]; 3455 int ret, i, n; 3456 3457 if (!obj->active) 3458 return 0; 3459 3460 if (to == NULL) 3461 return i915_gem_object_wait_rendering(obj, readonly); 3462 3463 n = 0; 3464 if (readonly) { 3465 if (obj->last_write_req) 3466 req[n++] = obj->last_write_req; 3467 } else { 3468 for (i = 0; i < I915_NUM_ENGINES; i++) 3469 if (obj->last_read_req[i]) 3470 req[n++] = obj->last_read_req[i]; 3471 } 3472 for (i = 0; i < n; i++) { 3473 ret = __i915_gem_object_sync(obj, to, req[i], to_req); 3474 if (ret) 3475 return ret; 3476 } 3477 3478 return 0; 3479 } 3480 3481 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) 3482 { 3483 u32 old_write_domain, old_read_domains; 3484 3485 /* Force a pagefault for domain tracking on next user access */ 3486 i915_gem_release_mmap(obj); 3487 3488 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3489 return; 3490 3491 old_read_domains = obj->base.read_domains; 3492 old_write_domain = obj->base.write_domain; 3493 3494 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT; 3495 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT; 3496 3497 trace_i915_gem_object_change_domain(obj, 3498 old_read_domains, 3499 old_write_domain); 3500 } 3501 3502 static int __i915_vma_unbind(struct i915_vma *vma, bool wait) 3503 { 3504 struct drm_i915_gem_object *obj = vma->obj; 3505 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3506 int ret; 3507 3508 if (list_empty(&vma->obj_link)) 3509 return 0; 3510 3511 if (!drm_mm_node_allocated(&vma->node)) { 3512 i915_gem_vma_destroy(vma); 3513 return 0; 3514 } 3515 3516 if (vma->pin_count) 3517 return -EBUSY; 3518 3519 BUG_ON(obj->pages == NULL); 3520 3521 if (wait) { 3522 ret = i915_gem_object_wait_rendering(obj, false); 3523 if (ret) 3524 return ret; 3525 } 3526 3527 if (vma->is_ggtt && vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { 3528 i915_gem_object_finish_gtt(obj); 3529 3530 /* release the fence reg _after_ flushing */ 3531 ret = i915_gem_object_put_fence(obj); 3532 if (ret) 3533 return ret; 3534 } 3535 3536 trace_i915_vma_unbind(vma); 3537 3538 vma->vm->unbind_vma(vma); 3539 vma->bound = 0; 3540 3541 list_del_init(&vma->vm_link); 3542 if (vma->is_ggtt) { 3543 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { 3544 obj->map_and_fenceable = false; 3545 } else if (vma->ggtt_view.pages) { 3546 sg_free_table(vma->ggtt_view.pages); 3547 kfree(vma->ggtt_view.pages); 3548 } 3549 vma->ggtt_view.pages = NULL; 3550 } 3551 3552 drm_mm_remove_node(&vma->node); 3553 i915_gem_vma_destroy(vma); 3554 3555 /* Since the unbound list is global, only move to that list if 3556 * no more VMAs exist. */ 3557 if (list_empty(&obj->vma_list)) 3558 list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list); 3559 3560 /* And finally now the object is completely decoupled from this vma, 3561 * we can drop its hold on the backing storage and allow it to be 3562 * reaped by the shrinker. 3563 */ 3564 i915_gem_object_unpin_pages(obj); 3565 3566 return 0; 3567 } 3568 3569 int i915_vma_unbind(struct i915_vma *vma) 3570 { 3571 return __i915_vma_unbind(vma, true); 3572 } 3573 3574 int __i915_vma_unbind_no_wait(struct i915_vma *vma) 3575 { 3576 return __i915_vma_unbind(vma, false); 3577 } 3578 3579 int i915_gpu_idle(struct drm_device *dev) 3580 { 3581 struct drm_i915_private *dev_priv = dev->dev_private; 3582 struct intel_engine_cs *engine; 3583 int ret; 3584 3585 /* Flush everything onto the inactive list. */ 3586 for_each_engine(engine, dev_priv) { 3587 if (!i915.enable_execlists) { 3588 struct drm_i915_gem_request *req; 3589 3590 req = i915_gem_request_alloc(engine, NULL); 3591 if (IS_ERR(req)) 3592 return PTR_ERR(req); 3593 3594 ret = i915_switch_context(req); 3595 i915_add_request_no_flush(req); 3596 if (ret) 3597 return ret; 3598 } 3599 3600 ret = intel_engine_idle(engine); 3601 if (ret) 3602 return ret; 3603 } 3604 3605 WARN_ON(i915_verify_lists(dev)); 3606 return 0; 3607 } 3608 3609 static bool i915_gem_valid_gtt_space(struct i915_vma *vma, 3610 unsigned long cache_level) 3611 { 3612 struct drm_mm_node *gtt_space = &vma->node; 3613 struct drm_mm_node *other; 3614 3615 /* 3616 * On some machines we have to be careful when putting differing types 3617 * of snoopable memory together to avoid the prefetcher crossing memory 3618 * domains and dying. During vm initialisation, we decide whether or not 3619 * these constraints apply and set the drm_mm.color_adjust 3620 * appropriately. 3621 */ 3622 if (vma->vm->mm.color_adjust == NULL) 3623 return true; 3624 3625 if (!drm_mm_node_allocated(gtt_space)) 3626 return true; 3627 3628 if (list_empty(>t_space->node_list)) 3629 return true; 3630 3631 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list); 3632 if (other->allocated && !other->hole_follows && other->color != cache_level) 3633 return false; 3634 3635 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list); 3636 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level) 3637 return false; 3638 3639 return true; 3640 } 3641 3642 /** 3643 * Finds free space in the GTT aperture and binds the object or a view of it 3644 * there. 3645 */ 3646 static struct i915_vma * 3647 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, 3648 struct i915_address_space *vm, 3649 const struct i915_ggtt_view *ggtt_view, 3650 unsigned alignment, 3651 uint64_t flags) 3652 { 3653 struct drm_device *dev = obj->base.dev; 3654 struct drm_i915_private *dev_priv = to_i915(dev); 3655 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3656 u32 fence_alignment, unfenced_alignment; 3657 u32 search_flag, alloc_flag; 3658 u64 start, end; 3659 u64 size, fence_size; 3660 struct i915_vma *vma; 3661 int ret; 3662 3663 if (i915_is_ggtt(vm)) { 3664 u32 view_size; 3665 3666 if (WARN_ON(!ggtt_view)) 3667 return ERR_PTR(-EINVAL); 3668 3669 view_size = i915_ggtt_view_size(obj, ggtt_view); 3670 3671 fence_size = i915_gem_get_gtt_size(dev, 3672 view_size, 3673 obj->tiling_mode); 3674 fence_alignment = i915_gem_get_gtt_alignment(dev, 3675 view_size, 3676 obj->tiling_mode, 3677 true); 3678 unfenced_alignment = i915_gem_get_gtt_alignment(dev, 3679 view_size, 3680 obj->tiling_mode, 3681 false); 3682 size = flags & PIN_MAPPABLE ? fence_size : view_size; 3683 } else { 3684 fence_size = i915_gem_get_gtt_size(dev, 3685 obj->base.size, 3686 obj->tiling_mode); 3687 fence_alignment = i915_gem_get_gtt_alignment(dev, 3688 obj->base.size, 3689 obj->tiling_mode, 3690 true); 3691 unfenced_alignment = 3692 i915_gem_get_gtt_alignment(dev, 3693 obj->base.size, 3694 obj->tiling_mode, 3695 false); 3696 size = flags & PIN_MAPPABLE ? fence_size : obj->base.size; 3697 } 3698 3699 start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; 3700 end = vm->total; 3701 if (flags & PIN_MAPPABLE) 3702 end = min_t(u64, end, ggtt->mappable_end); 3703 if (flags & PIN_ZONE_4G) 3704 end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE); 3705 3706 if (alignment == 0) 3707 alignment = flags & PIN_MAPPABLE ? fence_alignment : 3708 unfenced_alignment; 3709 if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) { 3710 DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n", 3711 ggtt_view ? ggtt_view->type : 0, 3712 alignment); 3713 return ERR_PTR(-EINVAL); 3714 } 3715 3716 /* If binding the object/GGTT view requires more space than the entire 3717 * aperture has, reject it early before evicting everything in a vain 3718 * attempt to find space. 3719 */ 3720 if (size > end) { 3721 DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%llu > %s aperture=%llu\n", 3722 ggtt_view ? ggtt_view->type : 0, 3723 size, 3724 flags & PIN_MAPPABLE ? "mappable" : "total", 3725 end); 3726 return ERR_PTR(-E2BIG); 3727 } 3728 3729 ret = i915_gem_object_get_pages(obj); 3730 if (ret) 3731 return ERR_PTR(ret); 3732 3733 i915_gem_object_pin_pages(obj); 3734 3735 vma = ggtt_view ? i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) : 3736 i915_gem_obj_lookup_or_create_vma(obj, vm); 3737 3738 if (IS_ERR(vma)) 3739 goto err_unpin; 3740 3741 if (flags & PIN_OFFSET_FIXED) { 3742 uint64_t offset = flags & PIN_OFFSET_MASK; 3743 3744 if (offset & (alignment - 1) || offset + size > end) { 3745 ret = -EINVAL; 3746 goto err_free_vma; 3747 } 3748 vma->node.start = offset; 3749 vma->node.size = size; 3750 vma->node.color = obj->cache_level; 3751 ret = drm_mm_reserve_node(&vm->mm, &vma->node); 3752 if (ret) { 3753 ret = i915_gem_evict_for_vma(vma); 3754 if (ret == 0) 3755 ret = drm_mm_reserve_node(&vm->mm, &vma->node); 3756 } 3757 if (ret) 3758 goto err_free_vma; 3759 } else { 3760 if (flags & PIN_HIGH) { 3761 search_flag = DRM_MM_SEARCH_BELOW; 3762 alloc_flag = DRM_MM_CREATE_TOP; 3763 } else { 3764 search_flag = DRM_MM_SEARCH_DEFAULT; 3765 alloc_flag = DRM_MM_CREATE_DEFAULT; 3766 } 3767 3768 search_free: 3769 ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, 3770 size, alignment, 3771 obj->cache_level, 3772 start, end, 3773 search_flag, 3774 alloc_flag); 3775 if (ret) { 3776 ret = i915_gem_evict_something(dev, vm, size, alignment, 3777 obj->cache_level, 3778 start, end, 3779 flags); 3780 if (ret == 0) 3781 goto search_free; 3782 3783 goto err_free_vma; 3784 } 3785 } 3786 if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) { 3787 ret = -EINVAL; 3788 goto err_remove_node; 3789 } 3790 3791 trace_i915_vma_bind(vma, flags); 3792 ret = i915_vma_bind(vma, obj->cache_level, flags); 3793 if (ret) 3794 goto err_remove_node; 3795 3796 list_move_tail(&obj->global_list, &dev_priv->mm.bound_list); 3797 list_add_tail(&vma->vm_link, &vm->inactive_list); 3798 3799 return vma; 3800 3801 err_remove_node: 3802 drm_mm_remove_node(&vma->node); 3803 err_free_vma: 3804 i915_gem_vma_destroy(vma); 3805 vma = ERR_PTR(ret); 3806 err_unpin: 3807 i915_gem_object_unpin_pages(obj); 3808 return vma; 3809 } 3810 3811 bool 3812 i915_gem_clflush_object(struct drm_i915_gem_object *obj, 3813 bool force) 3814 { 3815 /* If we don't have a page list set up, then we're not pinned 3816 * to GPU, and we can ignore the cache flush because it'll happen 3817 * again at bind time. 3818 */ 3819 if (obj->pages == NULL) 3820 return false; 3821 3822 /* 3823 * Stolen memory is always coherent with the GPU as it is explicitly 3824 * marked as wc by the system, or the system is cache-coherent. 3825 */ 3826 if (obj->stolen || obj->phys_handle) 3827 return false; 3828 3829 /* If the GPU is snooping the contents of the CPU cache, 3830 * we do not need to manually clear the CPU cache lines. However, 3831 * the caches are only snooped when the render cache is 3832 * flushed/invalidated. As we always have to emit invalidations 3833 * and flushes when moving into and out of the RENDER domain, correct 3834 * snooping behaviour occurs naturally as the result of our domain 3835 * tracking. 3836 */ 3837 if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) { 3838 obj->cache_dirty = true; 3839 return false; 3840 } 3841 3842 trace_i915_gem_object_clflush(obj); 3843 drm_clflush_sg(obj->pages); 3844 obj->cache_dirty = false; 3845 3846 return true; 3847 } 3848 3849 /** Flushes the GTT write domain for the object if it's dirty. */ 3850 static void 3851 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) 3852 { 3853 uint32_t old_write_domain; 3854 3855 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) 3856 return; 3857 3858 /* No actual flushing is required for the GTT write domain. Writes 3859 * to it immediately go to main memory as far as we know, so there's 3860 * no chipset flush. It also doesn't land in render cache. 3861 * 3862 * However, we do have to enforce the order so that all writes through 3863 * the GTT land before any writes to the device, such as updates to 3864 * the GATT itself. 3865 */ 3866 wmb(); 3867 3868 old_write_domain = obj->base.write_domain; 3869 obj->base.write_domain = 0; 3870 3871 intel_fb_obj_flush(obj, false, ORIGIN_GTT); 3872 3873 trace_i915_gem_object_change_domain(obj, 3874 obj->base.read_domains, 3875 old_write_domain); 3876 } 3877 3878 /** Flushes the CPU write domain for the object if it's dirty. */ 3879 static void 3880 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) 3881 { 3882 uint32_t old_write_domain; 3883 3884 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) 3885 return; 3886 3887 if (i915_gem_clflush_object(obj, obj->pin_display)) 3888 i915_gem_chipset_flush(obj->base.dev); 3889 3890 old_write_domain = obj->base.write_domain; 3891 obj->base.write_domain = 0; 3892 3893 intel_fb_obj_flush(obj, false, ORIGIN_CPU); 3894 3895 trace_i915_gem_object_change_domain(obj, 3896 obj->base.read_domains, 3897 old_write_domain); 3898 } 3899 3900 /** 3901 * Moves a single object to the GTT read, and possibly write domain. 3902 * 3903 * This function returns when the move is complete, including waiting on 3904 * flushes to occur. 3905 */ 3906 int 3907 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 3908 { 3909 struct drm_device *dev = obj->base.dev; 3910 struct drm_i915_private *dev_priv = to_i915(dev); 3911 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3912 uint32_t old_write_domain, old_read_domains; 3913 struct i915_vma *vma; 3914 int ret; 3915 3916 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) 3917 return 0; 3918 3919 ret = i915_gem_object_wait_rendering(obj, !write); 3920 if (ret) 3921 return ret; 3922 3923 /* Flush and acquire obj->pages so that we are coherent through 3924 * direct access in memory with previous cached writes through 3925 * shmemfs and that our cache domain tracking remains valid. 3926 * For example, if the obj->filp was moved to swap without us 3927 * being notified and releasing the pages, we would mistakenly 3928 * continue to assume that the obj remained out of the CPU cached 3929 * domain. 3930 */ 3931 ret = i915_gem_object_get_pages(obj); 3932 if (ret) 3933 return ret; 3934 3935 i915_gem_object_flush_cpu_write_domain(obj); 3936 3937 /* Serialise direct access to this object with the barriers for 3938 * coherent writes from the GPU, by effectively invalidating the 3939 * GTT domain upon first access. 3940 */ 3941 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3942 mb(); 3943 3944 old_write_domain = obj->base.write_domain; 3945 old_read_domains = obj->base.read_domains; 3946 3947 /* It should now be out of any other write domains, and we can update 3948 * the domain values for our changes. 3949 */ 3950 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 3951 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 3952 if (write) { 3953 obj->base.read_domains = I915_GEM_DOMAIN_GTT; 3954 obj->base.write_domain = I915_GEM_DOMAIN_GTT; 3955 obj->dirty = 1; 3956 } 3957 3958 trace_i915_gem_object_change_domain(obj, 3959 old_read_domains, 3960 old_write_domain); 3961 3962 /* And bump the LRU for this access */ 3963 vma = i915_gem_obj_to_ggtt(obj); 3964 if (vma && drm_mm_node_allocated(&vma->node) && !obj->active) 3965 list_move_tail(&vma->vm_link, 3966 &ggtt->base.inactive_list); 3967 3968 return 0; 3969 } 3970 3971 /** 3972 * Changes the cache-level of an object across all VMA. 3973 * 3974 * After this function returns, the object will be in the new cache-level 3975 * across all GTT and the contents of the backing storage will be coherent, 3976 * with respect to the new cache-level. In order to keep the backing storage 3977 * coherent for all users, we only allow a single cache level to be set 3978 * globally on the object and prevent it from being changed whilst the 3979 * hardware is reading from the object. That is if the object is currently 3980 * on the scanout it will be set to uncached (or equivalent display 3981 * cache coherency) and all non-MOCS GPU access will also be uncached so 3982 * that all direct access to the scanout remains coherent. 3983 */ 3984 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 3985 enum i915_cache_level cache_level) 3986 { 3987 struct drm_device *dev = obj->base.dev; 3988 struct i915_vma *vma, *next; 3989 bool bound = false; 3990 int ret = 0; 3991 3992 if (obj->cache_level == cache_level) 3993 goto out; 3994 3995 /* Inspect the list of currently bound VMA and unbind any that would 3996 * be invalid given the new cache-level. This is principally to 3997 * catch the issue of the CS prefetch crossing page boundaries and 3998 * reading an invalid PTE on older architectures. 3999 */ 4000 list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) { 4001 if (!drm_mm_node_allocated(&vma->node)) 4002 continue; 4003 4004 if (vma->pin_count) { 4005 DRM_DEBUG("can not change the cache level of pinned objects\n"); 4006 return -EBUSY; 4007 } 4008 4009 if (!i915_gem_valid_gtt_space(vma, cache_level)) { 4010 ret = i915_vma_unbind(vma); 4011 if (ret) 4012 return ret; 4013 } else 4014 bound = true; 4015 } 4016 4017 /* We can reuse the existing drm_mm nodes but need to change the 4018 * cache-level on the PTE. We could simply unbind them all and 4019 * rebind with the correct cache-level on next use. However since 4020 * we already have a valid slot, dma mapping, pages etc, we may as 4021 * rewrite the PTE in the belief that doing so tramples upon less 4022 * state and so involves less work. 4023 */ 4024 if (bound) { 4025 /* Before we change the PTE, the GPU must not be accessing it. 4026 * If we wait upon the object, we know that all the bound 4027 * VMA are no longer active. 4028 */ 4029 ret = i915_gem_object_wait_rendering(obj, false); 4030 if (ret) 4031 return ret; 4032 4033 if (!HAS_LLC(dev) && cache_level != I915_CACHE_NONE) { 4034 /* Access to snoopable pages through the GTT is 4035 * incoherent and on some machines causes a hard 4036 * lockup. Relinquish the CPU mmaping to force 4037 * userspace to refault in the pages and we can 4038 * then double check if the GTT mapping is still 4039 * valid for that pointer access. 4040 */ 4041 i915_gem_release_mmap(obj); 4042 4043 /* As we no longer need a fence for GTT access, 4044 * we can relinquish it now (and so prevent having 4045 * to steal a fence from someone else on the next 4046 * fence request). Note GPU activity would have 4047 * dropped the fence as all snoopable access is 4048 * supposed to be linear. 4049 */ 4050 ret = i915_gem_object_put_fence(obj); 4051 if (ret) 4052 return ret; 4053 } else { 4054 /* We either have incoherent backing store and 4055 * so no GTT access or the architecture is fully 4056 * coherent. In such cases, existing GTT mmaps 4057 * ignore the cache bit in the PTE and we can 4058 * rewrite it without confusing the GPU or having 4059 * to force userspace to fault back in its mmaps. 4060 */ 4061 } 4062 4063 list_for_each_entry(vma, &obj->vma_list, obj_link) { 4064 if (!drm_mm_node_allocated(&vma->node)) 4065 continue; 4066 4067 ret = i915_vma_bind(vma, cache_level, PIN_UPDATE); 4068 if (ret) 4069 return ret; 4070 } 4071 } 4072 4073 list_for_each_entry(vma, &obj->vma_list, obj_link) 4074 vma->node.color = cache_level; 4075 obj->cache_level = cache_level; 4076 4077 out: 4078 /* Flush the dirty CPU caches to the backing storage so that the 4079 * object is now coherent at its new cache level (with respect 4080 * to the access domain). 4081 */ 4082 if (obj->cache_dirty && 4083 obj->base.write_domain != I915_GEM_DOMAIN_CPU && 4084 cpu_write_needs_clflush(obj)) { 4085 if (i915_gem_clflush_object(obj, true)) 4086 i915_gem_chipset_flush(obj->base.dev); 4087 } 4088 4089 return 0; 4090 } 4091 4092 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 4093 struct drm_file *file) 4094 { 4095 struct drm_i915_gem_caching *args = data; 4096 struct drm_i915_gem_object *obj; 4097 4098 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 4099 if (&obj->base == NULL) 4100 return -ENOENT; 4101 4102 switch (obj->cache_level) { 4103 case I915_CACHE_LLC: 4104 case I915_CACHE_L3_LLC: 4105 args->caching = I915_CACHING_CACHED; 4106 break; 4107 4108 case I915_CACHE_WT: 4109 args->caching = I915_CACHING_DISPLAY; 4110 break; 4111 4112 default: 4113 args->caching = I915_CACHING_NONE; 4114 break; 4115 } 4116 4117 drm_gem_object_unreference_unlocked(&obj->base); 4118 return 0; 4119 } 4120 4121 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 4122 struct drm_file *file) 4123 { 4124 struct drm_i915_private *dev_priv = dev->dev_private; 4125 struct drm_i915_gem_caching *args = data; 4126 struct drm_i915_gem_object *obj; 4127 enum i915_cache_level level; 4128 int ret; 4129 4130 switch (args->caching) { 4131 case I915_CACHING_NONE: 4132 level = I915_CACHE_NONE; 4133 break; 4134 case I915_CACHING_CACHED: 4135 /* 4136 * Due to a HW issue on BXT A stepping, GPU stores via a 4137 * snooped mapping may leave stale data in a corresponding CPU 4138 * cacheline, whereas normally such cachelines would get 4139 * invalidated. 4140 */ 4141 if (!HAS_LLC(dev) && !HAS_SNOOP(dev)) 4142 return -ENODEV; 4143 4144 level = I915_CACHE_LLC; 4145 break; 4146 case I915_CACHING_DISPLAY: 4147 level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE; 4148 break; 4149 default: 4150 return -EINVAL; 4151 } 4152 4153 intel_runtime_pm_get(dev_priv); 4154 4155 ret = i915_mutex_lock_interruptible(dev); 4156 if (ret) 4157 goto rpm_put; 4158 4159 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 4160 if (&obj->base == NULL) { 4161 ret = -ENOENT; 4162 goto unlock; 4163 } 4164 4165 ret = i915_gem_object_set_cache_level(obj, level); 4166 4167 drm_gem_object_unreference(&obj->base); 4168 unlock: 4169 mutex_unlock(&dev->struct_mutex); 4170 rpm_put: 4171 intel_runtime_pm_put(dev_priv); 4172 4173 return ret; 4174 } 4175 4176 /* 4177 * Prepare buffer for display plane (scanout, cursors, etc). 4178 * Can be called from an uninterruptible phase (modesetting) and allows 4179 * any flushes to be pipelined (for pageflips). 4180 */ 4181 int 4182 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 4183 u32 alignment, 4184 const struct i915_ggtt_view *view) 4185 { 4186 u32 old_read_domains, old_write_domain; 4187 int ret; 4188 4189 /* Mark the pin_display early so that we account for the 4190 * display coherency whilst setting up the cache domains. 4191 */ 4192 obj->pin_display++; 4193 4194 /* The display engine is not coherent with the LLC cache on gen6. As 4195 * a result, we make sure that the pinning that is about to occur is 4196 * done with uncached PTEs. This is lowest common denominator for all 4197 * chipsets. 4198 * 4199 * However for gen6+, we could do better by using the GFDT bit instead 4200 * of uncaching, which would allow us to flush all the LLC-cached data 4201 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 4202 */ 4203 ret = i915_gem_object_set_cache_level(obj, 4204 HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE); 4205 if (ret) 4206 goto err_unpin_display; 4207 4208 /* As the user may map the buffer once pinned in the display plane 4209 * (e.g. libkms for the bootup splash), we have to ensure that we 4210 * always use map_and_fenceable for all scanout buffers. 4211 */ 4212 ret = i915_gem_object_ggtt_pin(obj, view, alignment, 4213 view->type == I915_GGTT_VIEW_NORMAL ? 4214 PIN_MAPPABLE : 0); 4215 if (ret) 4216 goto err_unpin_display; 4217 4218 i915_gem_object_flush_cpu_write_domain(obj); 4219 4220 old_write_domain = obj->base.write_domain; 4221 old_read_domains = obj->base.read_domains; 4222 4223 /* It should now be out of any other write domains, and we can update 4224 * the domain values for our changes. 4225 */ 4226 obj->base.write_domain = 0; 4227 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 4228 4229 trace_i915_gem_object_change_domain(obj, 4230 old_read_domains, 4231 old_write_domain); 4232 4233 return 0; 4234 4235 err_unpin_display: 4236 obj->pin_display--; 4237 return ret; 4238 } 4239 4240 void 4241 i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj, 4242 const struct i915_ggtt_view *view) 4243 { 4244 if (WARN_ON(obj->pin_display == 0)) 4245 return; 4246 4247 i915_gem_object_ggtt_unpin_view(obj, view); 4248 4249 obj->pin_display--; 4250 } 4251 4252 /** 4253 * Moves a single object to the CPU read, and possibly write domain. 4254 * 4255 * This function returns when the move is complete, including waiting on 4256 * flushes to occur. 4257 */ 4258 int 4259 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 4260 { 4261 uint32_t old_write_domain, old_read_domains; 4262 int ret; 4263 4264 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) 4265 return 0; 4266 4267 ret = i915_gem_object_wait_rendering(obj, !write); 4268 if (ret) 4269 return ret; 4270 4271 i915_gem_object_flush_gtt_write_domain(obj); 4272 4273 old_write_domain = obj->base.write_domain; 4274 old_read_domains = obj->base.read_domains; 4275 4276 /* Flush the CPU cache if it's still invalid. */ 4277 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { 4278 i915_gem_clflush_object(obj, false); 4279 4280 obj->base.read_domains |= I915_GEM_DOMAIN_CPU; 4281 } 4282 4283 /* It should now be out of any other write domains, and we can update 4284 * the domain values for our changes. 4285 */ 4286 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 4287 4288 /* If we're writing through the CPU, then the GPU read domains will 4289 * need to be invalidated at next use. 4290 */ 4291 if (write) { 4292 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4293 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4294 } 4295 4296 trace_i915_gem_object_change_domain(obj, 4297 old_read_domains, 4298 old_write_domain); 4299 4300 return 0; 4301 } 4302 4303 /* Throttle our rendering by waiting until the ring has completed our requests 4304 * emitted over 20 msec ago. 4305 * 4306 * Note that if we were to use the current jiffies each time around the loop, 4307 * we wouldn't escape the function with any frames outstanding if the time to 4308 * render a frame was over 20ms. 4309 * 4310 * This should get us reasonable parallelism between CPU and GPU but also 4311 * relatively low latency when blocking on a particular request to finish. 4312 */ 4313 static int 4314 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 4315 { 4316 struct drm_i915_private *dev_priv = dev->dev_private; 4317 struct drm_i915_file_private *file_priv = file->driver_priv; 4318 unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES; 4319 struct drm_i915_gem_request *request, *target = NULL; 4320 int ret; 4321 4322 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 4323 if (ret) 4324 return ret; 4325 4326 /* ABI: return -EIO if already wedged */ 4327 if (i915_terminally_wedged(&dev_priv->gpu_error)) 4328 return -EIO; 4329 4330 spin_lock(&file_priv->mm.lock); 4331 list_for_each_entry(request, &file_priv->mm.request_list, client_list) { 4332 if (time_after_eq(request->emitted_jiffies, recent_enough)) 4333 break; 4334 4335 /* 4336 * Note that the request might not have been submitted yet. 4337 * In which case emitted_jiffies will be zero. 4338 */ 4339 if (!request->emitted_jiffies) 4340 continue; 4341 4342 target = request; 4343 } 4344 if (target) 4345 i915_gem_request_reference(target); 4346 spin_unlock(&file_priv->mm.lock); 4347 4348 if (target == NULL) 4349 return 0; 4350 4351 ret = __i915_wait_request(target, true, NULL, NULL); 4352 if (ret == 0) 4353 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0); 4354 4355 i915_gem_request_unreference__unlocked(target); 4356 4357 return ret; 4358 } 4359 4360 static bool 4361 i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags) 4362 { 4363 struct drm_i915_gem_object *obj = vma->obj; 4364 4365 if (alignment && 4366 vma->node.start & (alignment - 1)) 4367 return true; 4368 4369 if (flags & PIN_MAPPABLE && !obj->map_and_fenceable) 4370 return true; 4371 4372 if (flags & PIN_OFFSET_BIAS && 4373 vma->node.start < (flags & PIN_OFFSET_MASK)) 4374 return true; 4375 4376 if (flags & PIN_OFFSET_FIXED && 4377 vma->node.start != (flags & PIN_OFFSET_MASK)) 4378 return true; 4379 4380 return false; 4381 } 4382 4383 void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) 4384 { 4385 struct drm_i915_gem_object *obj = vma->obj; 4386 bool mappable, fenceable; 4387 u32 fence_size, fence_alignment; 4388 4389 fence_size = i915_gem_get_gtt_size(obj->base.dev, 4390 obj->base.size, 4391 obj->tiling_mode); 4392 fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev, 4393 obj->base.size, 4394 obj->tiling_mode, 4395 true); 4396 4397 fenceable = (vma->node.size == fence_size && 4398 (vma->node.start & (fence_alignment - 1)) == 0); 4399 4400 mappable = (vma->node.start + fence_size <= 4401 to_i915(obj->base.dev)->ggtt.mappable_end); 4402 4403 obj->map_and_fenceable = mappable && fenceable; 4404 } 4405 4406 static int 4407 i915_gem_object_do_pin(struct drm_i915_gem_object *obj, 4408 struct i915_address_space *vm, 4409 const struct i915_ggtt_view *ggtt_view, 4410 uint32_t alignment, 4411 uint64_t flags) 4412 { 4413 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 4414 struct i915_vma *vma; 4415 unsigned bound; 4416 int ret; 4417 4418 if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base)) 4419 return -ENODEV; 4420 4421 if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm))) 4422 return -EINVAL; 4423 4424 if (WARN_ON((flags & (PIN_MAPPABLE | PIN_GLOBAL)) == PIN_MAPPABLE)) 4425 return -EINVAL; 4426 4427 if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view)) 4428 return -EINVAL; 4429 4430 vma = ggtt_view ? i915_gem_obj_to_ggtt_view(obj, ggtt_view) : 4431 i915_gem_obj_to_vma(obj, vm); 4432 4433 if (vma) { 4434 if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) 4435 return -EBUSY; 4436 4437 if (i915_vma_misplaced(vma, alignment, flags)) { 4438 WARN(vma->pin_count, 4439 "bo is already pinned in %s with incorrect alignment:" 4440 " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d," 4441 " obj->map_and_fenceable=%d\n", 4442 ggtt_view ? "ggtt" : "ppgtt", 4443 upper_32_bits(vma->node.start), 4444 lower_32_bits(vma->node.start), 4445 alignment, 4446 !!(flags & PIN_MAPPABLE), 4447 obj->map_and_fenceable); 4448 ret = i915_vma_unbind(vma); 4449 if (ret) 4450 return ret; 4451 4452 vma = NULL; 4453 } 4454 } 4455 4456 bound = vma ? vma->bound : 0; 4457 if (vma == NULL || !drm_mm_node_allocated(&vma->node)) { 4458 vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment, 4459 flags); 4460 if (IS_ERR(vma)) 4461 return PTR_ERR(vma); 4462 } else { 4463 ret = i915_vma_bind(vma, obj->cache_level, flags); 4464 if (ret) 4465 return ret; 4466 } 4467 4468 if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL && 4469 (bound ^ vma->bound) & GLOBAL_BIND) { 4470 __i915_vma_set_map_and_fenceable(vma); 4471 WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable); 4472 } 4473 4474 vma->pin_count++; 4475 return 0; 4476 } 4477 4478 int 4479 i915_gem_object_pin(struct drm_i915_gem_object *obj, 4480 struct i915_address_space *vm, 4481 uint32_t alignment, 4482 uint64_t flags) 4483 { 4484 return i915_gem_object_do_pin(obj, vm, 4485 i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL, 4486 alignment, flags); 4487 } 4488 4489 int 4490 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, 4491 const struct i915_ggtt_view *view, 4492 uint32_t alignment, 4493 uint64_t flags) 4494 { 4495 struct drm_device *dev = obj->base.dev; 4496 struct drm_i915_private *dev_priv = to_i915(dev); 4497 struct i915_ggtt *ggtt = &dev_priv->ggtt; 4498 4499 BUG_ON(!view); 4500 4501 return i915_gem_object_do_pin(obj, &ggtt->base, view, 4502 alignment, flags | PIN_GLOBAL); 4503 } 4504 4505 void 4506 i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj, 4507 const struct i915_ggtt_view *view) 4508 { 4509 struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view); 4510 4511 WARN_ON(vma->pin_count == 0); 4512 WARN_ON(!i915_gem_obj_ggtt_bound_view(obj, view)); 4513 4514 --vma->pin_count; 4515 } 4516 4517 int 4518 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 4519 struct drm_file *file) 4520 { 4521 struct drm_i915_gem_busy *args = data; 4522 struct drm_i915_gem_object *obj; 4523 int ret; 4524 4525 ret = i915_mutex_lock_interruptible(dev); 4526 if (ret) 4527 return ret; 4528 4529 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 4530 if (&obj->base == NULL) { 4531 ret = -ENOENT; 4532 goto unlock; 4533 } 4534 4535 /* Count all active objects as busy, even if they are currently not used 4536 * by the gpu. Users of this interface expect objects to eventually 4537 * become non-busy without any further actions, therefore emit any 4538 * necessary flushes here. 4539 */ 4540 ret = i915_gem_object_flush_active(obj); 4541 if (ret) 4542 goto unref; 4543 4544 args->busy = 0; 4545 if (obj->active) { 4546 int i; 4547 4548 for (i = 0; i < I915_NUM_ENGINES; i++) { 4549 struct drm_i915_gem_request *req; 4550 4551 req = obj->last_read_req[i]; 4552 if (req) 4553 args->busy |= 1 << (16 + req->engine->exec_id); 4554 } 4555 if (obj->last_write_req) 4556 args->busy |= obj->last_write_req->engine->exec_id; 4557 } 4558 4559 unref: 4560 drm_gem_object_unreference(&obj->base); 4561 unlock: 4562 mutex_unlock(&dev->struct_mutex); 4563 return ret; 4564 } 4565 4566 int 4567 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 4568 struct drm_file *file_priv) 4569 { 4570 return i915_gem_ring_throttle(dev, file_priv); 4571 } 4572 4573 int 4574 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 4575 struct drm_file *file_priv) 4576 { 4577 struct drm_i915_private *dev_priv = dev->dev_private; 4578 struct drm_i915_gem_madvise *args = data; 4579 struct drm_i915_gem_object *obj; 4580 int ret; 4581 4582 switch (args->madv) { 4583 case I915_MADV_DONTNEED: 4584 case I915_MADV_WILLNEED: 4585 break; 4586 default: 4587 return -EINVAL; 4588 } 4589 4590 ret = i915_mutex_lock_interruptible(dev); 4591 if (ret) 4592 return ret; 4593 4594 obj = to_intel_bo(drm_gem_object_lookup(file_priv, args->handle)); 4595 if (&obj->base == NULL) { 4596 ret = -ENOENT; 4597 goto unlock; 4598 } 4599 4600 if (i915_gem_obj_is_pinned(obj)) { 4601 ret = -EINVAL; 4602 goto out; 4603 } 4604 4605 if (obj->pages && 4606 obj->tiling_mode != I915_TILING_NONE && 4607 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 4608 if (obj->madv == I915_MADV_WILLNEED) 4609 i915_gem_object_unpin_pages(obj); 4610 if (args->madv == I915_MADV_WILLNEED) 4611 i915_gem_object_pin_pages(obj); 4612 } 4613 4614 if (obj->madv != __I915_MADV_PURGED) 4615 obj->madv = args->madv; 4616 4617 /* if the object is no longer attached, discard its backing storage */ 4618 if (obj->madv == I915_MADV_DONTNEED && obj->pages == NULL) 4619 i915_gem_object_truncate(obj); 4620 4621 args->retained = obj->madv != __I915_MADV_PURGED; 4622 4623 out: 4624 drm_gem_object_unreference(&obj->base); 4625 unlock: 4626 mutex_unlock(&dev->struct_mutex); 4627 return ret; 4628 } 4629 4630 void i915_gem_object_init(struct drm_i915_gem_object *obj, 4631 const struct drm_i915_gem_object_ops *ops) 4632 { 4633 int i; 4634 4635 INIT_LIST_HEAD(&obj->global_list); 4636 for (i = 0; i < I915_NUM_ENGINES; i++) 4637 INIT_LIST_HEAD(&obj->engine_list[i]); 4638 INIT_LIST_HEAD(&obj->obj_exec_link); 4639 INIT_LIST_HEAD(&obj->vma_list); 4640 INIT_LIST_HEAD(&obj->batch_pool_link); 4641 4642 obj->ops = ops; 4643 4644 obj->fence_reg = I915_FENCE_REG_NONE; 4645 obj->madv = I915_MADV_WILLNEED; 4646 4647 i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size); 4648 } 4649 4650 static const struct drm_i915_gem_object_ops i915_gem_object_ops = { 4651 .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE, 4652 .get_pages = i915_gem_object_get_pages_gtt, 4653 .put_pages = i915_gem_object_put_pages_gtt, 4654 }; 4655 4656 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, 4657 size_t size) 4658 { 4659 struct drm_i915_gem_object *obj; 4660 #if 0 4661 struct address_space *mapping; 4662 gfp_t mask; 4663 #endif 4664 4665 obj = i915_gem_object_alloc(dev); 4666 if (obj == NULL) 4667 return NULL; 4668 4669 if (drm_gem_object_init(dev, &obj->base, size) != 0) { 4670 i915_gem_object_free(obj); 4671 return NULL; 4672 } 4673 4674 #if 0 4675 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 4676 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) { 4677 /* 965gm cannot relocate objects above 4GiB. */ 4678 mask &= ~__GFP_HIGHMEM; 4679 mask |= __GFP_DMA32; 4680 } 4681 4682 mapping = file_inode(obj->base.filp)->i_mapping; 4683 mapping_set_gfp_mask(mapping, mask); 4684 #endif 4685 4686 i915_gem_object_init(obj, &i915_gem_object_ops); 4687 4688 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4689 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4690 4691 if (HAS_LLC(dev)) { 4692 /* On some devices, we can have the GPU use the LLC (the CPU 4693 * cache) for about a 10% performance improvement 4694 * compared to uncached. Graphics requests other than 4695 * display scanout are coherent with the CPU in 4696 * accessing this cache. This means in this mode we 4697 * don't need to clflush on the CPU side, and on the 4698 * GPU side we only need to flush internal caches to 4699 * get data visible to the CPU. 4700 * 4701 * However, we maintain the display planes as UC, and so 4702 * need to rebind when first used as such. 4703 */ 4704 obj->cache_level = I915_CACHE_LLC; 4705 } else 4706 obj->cache_level = I915_CACHE_NONE; 4707 4708 trace_i915_gem_object_create(obj); 4709 4710 return obj; 4711 } 4712 4713 static bool discard_backing_storage(struct drm_i915_gem_object *obj) 4714 { 4715 /* If we are the last user of the backing storage (be it shmemfs 4716 * pages or stolen etc), we know that the pages are going to be 4717 * immediately released. In this case, we can then skip copying 4718 * back the contents from the GPU. 4719 */ 4720 4721 if (obj->madv != I915_MADV_WILLNEED) 4722 return false; 4723 4724 if (obj->base.filp == NULL) 4725 return true; 4726 4727 /* At first glance, this looks racy, but then again so would be 4728 * userspace racing mmap against close. However, the first external 4729 * reference to the filp can only be obtained through the 4730 * i915_gem_mmap_ioctl() which safeguards us against the user 4731 * acquiring such a reference whilst we are in the middle of 4732 * freeing the object. 4733 */ 4734 #if 0 4735 return atomic_long_read(&obj->base.filp->f_count) == 1; 4736 #else 4737 return false; 4738 #endif 4739 } 4740 4741 void i915_gem_free_object(struct drm_gem_object *gem_obj) 4742 { 4743 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 4744 struct drm_device *dev = obj->base.dev; 4745 struct drm_i915_private *dev_priv = dev->dev_private; 4746 struct i915_vma *vma, *next; 4747 4748 intel_runtime_pm_get(dev_priv); 4749 4750 trace_i915_gem_object_destroy(obj); 4751 4752 list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) { 4753 int ret; 4754 4755 vma->pin_count = 0; 4756 ret = i915_vma_unbind(vma); 4757 if (WARN_ON(ret == -ERESTARTSYS)) { 4758 bool was_interruptible; 4759 4760 was_interruptible = dev_priv->mm.interruptible; 4761 dev_priv->mm.interruptible = false; 4762 4763 WARN_ON(i915_vma_unbind(vma)); 4764 4765 dev_priv->mm.interruptible = was_interruptible; 4766 } 4767 } 4768 4769 /* Stolen objects don't hold a ref, but do hold pin count. Fix that up 4770 * before progressing. */ 4771 if (obj->stolen) 4772 i915_gem_object_unpin_pages(obj); 4773 4774 WARN_ON(obj->frontbuffer_bits); 4775 4776 if (obj->pages && obj->madv == I915_MADV_WILLNEED && 4777 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES && 4778 obj->tiling_mode != I915_TILING_NONE) 4779 i915_gem_object_unpin_pages(obj); 4780 4781 if (WARN_ON(obj->pages_pin_count)) 4782 obj->pages_pin_count = 0; 4783 if (discard_backing_storage(obj)) 4784 obj->madv = I915_MADV_DONTNEED; 4785 i915_gem_object_put_pages(obj); 4786 i915_gem_object_free_mmap_offset(obj); 4787 4788 BUG_ON(obj->pages); 4789 4790 #if 0 4791 if (obj->base.import_attach) 4792 drm_prime_gem_destroy(&obj->base, NULL); 4793 #endif 4794 4795 if (obj->ops->release) 4796 obj->ops->release(obj); 4797 4798 drm_gem_object_release(&obj->base); 4799 i915_gem_info_remove_obj(dev_priv, obj->base.size); 4800 4801 kfree(obj->bit_17); 4802 i915_gem_object_free(obj); 4803 4804 intel_runtime_pm_put(dev_priv); 4805 } 4806 4807 struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, 4808 struct i915_address_space *vm) 4809 { 4810 struct i915_vma *vma; 4811 list_for_each_entry(vma, &obj->vma_list, obj_link) { 4812 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL && 4813 vma->vm == vm) 4814 return vma; 4815 } 4816 return NULL; 4817 } 4818 4819 struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj, 4820 const struct i915_ggtt_view *view) 4821 { 4822 struct drm_device *dev = obj->base.dev; 4823 struct drm_i915_private *dev_priv = to_i915(dev); 4824 struct i915_ggtt *ggtt = &dev_priv->ggtt; 4825 struct i915_vma *vma; 4826 4827 BUG_ON(!view); 4828 4829 list_for_each_entry(vma, &obj->vma_list, obj_link) 4830 if (vma->vm == &ggtt->base && 4831 i915_ggtt_view_equal(&vma->ggtt_view, view)) 4832 return vma; 4833 return NULL; 4834 } 4835 4836 void i915_gem_vma_destroy(struct i915_vma *vma) 4837 { 4838 WARN_ON(vma->node.allocated); 4839 4840 /* Keep the vma as a placeholder in the execbuffer reservation lists */ 4841 if (!list_empty(&vma->exec_list)) 4842 return; 4843 4844 if (!vma->is_ggtt) 4845 i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm)); 4846 4847 list_del(&vma->obj_link); 4848 4849 kfree(vma); 4850 } 4851 4852 static void 4853 i915_gem_stop_engines(struct drm_device *dev) 4854 { 4855 struct drm_i915_private *dev_priv = dev->dev_private; 4856 struct intel_engine_cs *engine; 4857 4858 for_each_engine(engine, dev_priv) 4859 dev_priv->gt.stop_engine(engine); 4860 } 4861 4862 int 4863 i915_gem_suspend(struct drm_device *dev) 4864 { 4865 struct drm_i915_private *dev_priv = dev->dev_private; 4866 int ret = 0; 4867 4868 mutex_lock(&dev->struct_mutex); 4869 ret = i915_gpu_idle(dev); 4870 if (ret) 4871 goto err; 4872 4873 i915_gem_retire_requests(dev); 4874 4875 i915_gem_stop_engines(dev); 4876 mutex_unlock(&dev->struct_mutex); 4877 4878 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); 4879 cancel_delayed_work_sync(&dev_priv->mm.retire_work); 4880 #if 0 4881 flush_delayed_work(&dev_priv->mm.idle_work); 4882 #endif 4883 4884 /* Assert that we sucessfully flushed all the work and 4885 * reset the GPU back to its idle, low power state. 4886 */ 4887 WARN_ON(dev_priv->mm.busy); 4888 4889 return 0; 4890 4891 err: 4892 mutex_unlock(&dev->struct_mutex); 4893 return ret; 4894 } 4895 4896 int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice) 4897 { 4898 struct intel_engine_cs *engine = req->engine; 4899 struct drm_device *dev = engine->dev; 4900 struct drm_i915_private *dev_priv = dev->dev_private; 4901 u32 *remap_info = dev_priv->l3_parity.remap_info[slice]; 4902 int i, ret; 4903 4904 if (!HAS_L3_DPF(dev) || !remap_info) 4905 return 0; 4906 4907 ret = intel_ring_begin(req, GEN7_L3LOG_SIZE / 4 * 3); 4908 if (ret) 4909 return ret; 4910 4911 /* 4912 * Note: We do not worry about the concurrent register cacheline hang 4913 * here because no other code should access these registers other than 4914 * at initialization time. 4915 */ 4916 for (i = 0; i < GEN7_L3LOG_SIZE / 4; i++) { 4917 intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1)); 4918 intel_ring_emit_reg(engine, GEN7_L3LOG(slice, i)); 4919 intel_ring_emit(engine, remap_info[i]); 4920 } 4921 4922 intel_ring_advance(engine); 4923 4924 return ret; 4925 } 4926 4927 void i915_gem_init_swizzling(struct drm_device *dev) 4928 { 4929 struct drm_i915_private *dev_priv = dev->dev_private; 4930 4931 if (INTEL_INFO(dev)->gen < 5 || 4932 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 4933 return; 4934 4935 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 4936 DISP_TILE_SURFACE_SWIZZLING); 4937 4938 if (IS_GEN5(dev)) 4939 return; 4940 4941 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 4942 if (IS_GEN6(dev)) 4943 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 4944 else if (IS_GEN7(dev)) 4945 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 4946 else if (IS_GEN8(dev)) 4947 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW)); 4948 else 4949 BUG(); 4950 } 4951 4952 static void init_unused_ring(struct drm_device *dev, u32 base) 4953 { 4954 struct drm_i915_private *dev_priv = dev->dev_private; 4955 4956 I915_WRITE(RING_CTL(base), 0); 4957 I915_WRITE(RING_HEAD(base), 0); 4958 I915_WRITE(RING_TAIL(base), 0); 4959 I915_WRITE(RING_START(base), 0); 4960 } 4961 4962 static void init_unused_rings(struct drm_device *dev) 4963 { 4964 if (IS_I830(dev)) { 4965 init_unused_ring(dev, PRB1_BASE); 4966 init_unused_ring(dev, SRB0_BASE); 4967 init_unused_ring(dev, SRB1_BASE); 4968 init_unused_ring(dev, SRB2_BASE); 4969 init_unused_ring(dev, SRB3_BASE); 4970 } else if (IS_GEN2(dev)) { 4971 init_unused_ring(dev, SRB0_BASE); 4972 init_unused_ring(dev, SRB1_BASE); 4973 } else if (IS_GEN3(dev)) { 4974 init_unused_ring(dev, PRB1_BASE); 4975 init_unused_ring(dev, PRB2_BASE); 4976 } 4977 } 4978 4979 int i915_gem_init_engines(struct drm_device *dev) 4980 { 4981 struct drm_i915_private *dev_priv = dev->dev_private; 4982 int ret; 4983 4984 ret = intel_init_render_ring_buffer(dev); 4985 if (ret) 4986 return ret; 4987 4988 if (HAS_BSD(dev)) { 4989 ret = intel_init_bsd_ring_buffer(dev); 4990 if (ret) 4991 goto cleanup_render_ring; 4992 } 4993 4994 if (HAS_BLT(dev)) { 4995 ret = intel_init_blt_ring_buffer(dev); 4996 if (ret) 4997 goto cleanup_bsd_ring; 4998 } 4999 5000 if (HAS_VEBOX(dev)) { 5001 ret = intel_init_vebox_ring_buffer(dev); 5002 if (ret) 5003 goto cleanup_blt_ring; 5004 } 5005 5006 if (HAS_BSD2(dev)) { 5007 ret = intel_init_bsd2_ring_buffer(dev); 5008 if (ret) 5009 goto cleanup_vebox_ring; 5010 } 5011 5012 return 0; 5013 5014 cleanup_vebox_ring: 5015 intel_cleanup_engine(&dev_priv->engine[VECS]); 5016 cleanup_blt_ring: 5017 intel_cleanup_engine(&dev_priv->engine[BCS]); 5018 cleanup_bsd_ring: 5019 intel_cleanup_engine(&dev_priv->engine[VCS]); 5020 cleanup_render_ring: 5021 intel_cleanup_engine(&dev_priv->engine[RCS]); 5022 5023 return ret; 5024 } 5025 5026 int 5027 i915_gem_init_hw(struct drm_device *dev) 5028 { 5029 struct drm_i915_private *dev_priv = dev->dev_private; 5030 struct intel_engine_cs *engine; 5031 int ret, j; 5032 5033 /* Double layer security blanket, see i915_gem_init() */ 5034 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5035 5036 if (HAS_EDRAM(dev) && INTEL_GEN(dev_priv) < 9) 5037 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf)); 5038 5039 if (IS_HASWELL(dev)) 5040 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev) ? 5041 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); 5042 5043 if (HAS_PCH_NOP(dev)) { 5044 if (IS_IVYBRIDGE(dev)) { 5045 u32 temp = I915_READ(GEN7_MSG_CTL); 5046 temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK); 5047 I915_WRITE(GEN7_MSG_CTL, temp); 5048 } else if (INTEL_INFO(dev)->gen >= 7) { 5049 u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT); 5050 temp &= ~RESET_PCH_HANDSHAKE_ENABLE; 5051 I915_WRITE(HSW_NDE_RSTWRN_OPT, temp); 5052 } 5053 } 5054 5055 i915_gem_init_swizzling(dev); 5056 5057 /* 5058 * At least 830 can leave some of the unused rings 5059 * "active" (ie. head != tail) after resume which 5060 * will prevent c3 entry. Makes sure all unused rings 5061 * are totally idle. 5062 */ 5063 init_unused_rings(dev); 5064 5065 BUG_ON(!dev_priv->kernel_context); 5066 5067 ret = i915_ppgtt_init_hw(dev); 5068 if (ret) { 5069 DRM_ERROR("PPGTT enable HW failed %d\n", ret); 5070 goto out; 5071 } 5072 5073 /* Need to do basic initialisation of all rings first: */ 5074 for_each_engine(engine, dev_priv) { 5075 ret = engine->init_hw(engine); 5076 if (ret) 5077 goto out; 5078 } 5079 5080 intel_mocs_init_l3cc_table(dev); 5081 5082 /* We can't enable contexts until all firmware is loaded */ 5083 if (HAS_GUC_UCODE(dev)) { 5084 ret = intel_guc_ucode_load(dev); 5085 if (ret) { 5086 DRM_ERROR("Failed to initialize GuC, error %d\n", ret); 5087 ret = -EIO; 5088 goto out; 5089 } 5090 } 5091 5092 /* 5093 * Increment the next seqno by 0x100 so we have a visible break 5094 * on re-initialisation 5095 */ 5096 ret = i915_gem_set_seqno(dev, dev_priv->next_seqno+0x100); 5097 if (ret) 5098 goto out; 5099 5100 /* Now it is safe to go back round and do everything else: */ 5101 for_each_engine(engine, dev_priv) { 5102 struct drm_i915_gem_request *req; 5103 5104 req = i915_gem_request_alloc(engine, NULL); 5105 if (IS_ERR(req)) { 5106 ret = PTR_ERR(req); 5107 break; 5108 } 5109 5110 if (engine->id == RCS) { 5111 for (j = 0; j < NUM_L3_SLICES(dev); j++) { 5112 ret = i915_gem_l3_remap(req, j); 5113 if (ret) 5114 goto err_request; 5115 } 5116 } 5117 5118 ret = i915_ppgtt_init_ring(req); 5119 if (ret) 5120 goto err_request; 5121 5122 ret = i915_gem_context_enable(req); 5123 if (ret) 5124 goto err_request; 5125 5126 err_request: 5127 i915_add_request_no_flush(req); 5128 if (ret) { 5129 DRM_ERROR("Failed to enable %s, error=%d\n", 5130 engine->name, ret); 5131 i915_gem_cleanup_engines(dev); 5132 break; 5133 } 5134 } 5135 5136 out: 5137 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5138 return ret; 5139 } 5140 5141 int i915_gem_init(struct drm_device *dev) 5142 { 5143 struct drm_i915_private *dev_priv = dev->dev_private; 5144 int ret; 5145 5146 i915.enable_execlists = intel_sanitize_enable_execlists(dev, 5147 i915.enable_execlists); 5148 5149 mutex_lock(&dev->struct_mutex); 5150 5151 if (!i915.enable_execlists) { 5152 dev_priv->gt.execbuf_submit = i915_gem_ringbuffer_submission; 5153 dev_priv->gt.init_engines = i915_gem_init_engines; 5154 dev_priv->gt.cleanup_engine = intel_cleanup_engine; 5155 dev_priv->gt.stop_engine = intel_stop_engine; 5156 } else { 5157 dev_priv->gt.execbuf_submit = intel_execlists_submission; 5158 dev_priv->gt.init_engines = intel_logical_rings_init; 5159 dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup; 5160 dev_priv->gt.stop_engine = intel_logical_ring_stop; 5161 } 5162 5163 /* This is just a security blanket to placate dragons. 5164 * On some systems, we very sporadically observe that the first TLBs 5165 * used by the CS may be stale, despite us poking the TLB reset. If 5166 * we hold the forcewake during initialisation these problems 5167 * just magically go away. 5168 */ 5169 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5170 5171 ret = i915_gem_init_userptr(dev); 5172 if (ret) 5173 goto out_unlock; 5174 5175 i915_gem_init_ggtt(dev); 5176 5177 ret = i915_gem_context_init(dev); 5178 if (ret) 5179 goto out_unlock; 5180 5181 ret = dev_priv->gt.init_engines(dev); 5182 if (ret) 5183 goto out_unlock; 5184 5185 ret = i915_gem_init_hw(dev); 5186 if (ret == -EIO) { 5187 /* Allow ring initialisation to fail by marking the GPU as 5188 * wedged. But we only want to do this where the GPU is angry, 5189 * for all other failure, such as an allocation failure, bail. 5190 */ 5191 DRM_ERROR("Failed to initialize GPU, declaring it wedged\n"); 5192 atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter); 5193 ret = 0; 5194 } 5195 5196 out_unlock: 5197 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5198 mutex_unlock(&dev->struct_mutex); 5199 5200 return ret; 5201 } 5202 5203 void 5204 i915_gem_cleanup_engines(struct drm_device *dev) 5205 { 5206 struct drm_i915_private *dev_priv = dev->dev_private; 5207 struct intel_engine_cs *engine; 5208 5209 for_each_engine(engine, dev_priv) 5210 dev_priv->gt.cleanup_engine(engine); 5211 5212 if (i915.enable_execlists) 5213 /* 5214 * Neither the BIOS, ourselves or any other kernel 5215 * expects the system to be in execlists mode on startup, 5216 * so we need to reset the GPU back to legacy mode. 5217 */ 5218 intel_gpu_reset(dev, ALL_ENGINES); 5219 } 5220 5221 static void 5222 init_engine_lists(struct intel_engine_cs *engine) 5223 { 5224 INIT_LIST_HEAD(&engine->active_list); 5225 INIT_LIST_HEAD(&engine->request_list); 5226 } 5227 5228 void 5229 i915_gem_load_init_fences(struct drm_i915_private *dev_priv) 5230 { 5231 struct drm_device *dev = dev_priv->dev; 5232 5233 if (INTEL_INFO(dev_priv)->gen >= 7 && !IS_VALLEYVIEW(dev_priv) && 5234 !IS_CHERRYVIEW(dev_priv)) 5235 dev_priv->num_fence_regs = 32; 5236 else if (INTEL_INFO(dev_priv)->gen >= 4 || IS_I945G(dev_priv) || 5237 IS_I945GM(dev_priv) || IS_G33(dev_priv)) 5238 dev_priv->num_fence_regs = 16; 5239 else 5240 dev_priv->num_fence_regs = 8; 5241 5242 if (intel_vgpu_active(dev)) 5243 dev_priv->num_fence_regs = 5244 I915_READ(vgtif_reg(avail_rs.fence_num)); 5245 5246 /* Initialize fence registers to zero */ 5247 i915_gem_restore_fences(dev); 5248 5249 i915_gem_detect_bit_6_swizzle(dev); 5250 } 5251 5252 void 5253 i915_gem_load_init(struct drm_device *dev) 5254 { 5255 struct drm_i915_private *dev_priv = dev->dev_private; 5256 int i; 5257 5258 INIT_LIST_HEAD(&dev_priv->vm_list); 5259 INIT_LIST_HEAD(&dev_priv->context_list); 5260 INIT_LIST_HEAD(&dev_priv->mm.unbound_list); 5261 INIT_LIST_HEAD(&dev_priv->mm.bound_list); 5262 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 5263 for (i = 0; i < I915_NUM_ENGINES; i++) 5264 init_engine_lists(&dev_priv->engine[i]); 5265 for (i = 0; i < I915_MAX_NUM_FENCES; i++) 5266 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); 5267 INIT_DELAYED_WORK(&dev_priv->mm.retire_work, 5268 i915_gem_retire_work_handler); 5269 INIT_DELAYED_WORK(&dev_priv->mm.idle_work, 5270 i915_gem_idle_work_handler); 5271 init_waitqueue_head(&dev_priv->gpu_error.reset_queue); 5272 5273 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; 5274 5275 /* 5276 * Set initial sequence number for requests. 5277 * Using this number allows the wraparound to happen early, 5278 * catching any obvious problems. 5279 */ 5280 dev_priv->next_seqno = ((u32)~0 - 0x1100); 5281 dev_priv->last_seqno = ((u32)~0 - 0x1101); 5282 5283 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 5284 5285 init_waitqueue_head(&dev_priv->pending_flip_queue); 5286 5287 dev_priv->mm.interruptible = true; 5288 5289 lockinit(&dev_priv->fb_tracking.lock, "drmftl", 0, LK_CANRECURSE); 5290 } 5291 5292 void i915_gem_load_cleanup(struct drm_device *dev) 5293 { 5294 #if 0 5295 struct drm_i915_private *dev_priv = to_i915(dev); 5296 5297 kmem_cache_destroy(dev_priv->requests); 5298 kmem_cache_destroy(dev_priv->vmas); 5299 kmem_cache_destroy(dev_priv->objects); 5300 #endif 5301 } 5302 5303 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 5304 { 5305 struct drm_i915_file_private *file_priv = file->driver_priv; 5306 5307 /* Clean up our request list when the client is going away, so that 5308 * later retire_requests won't dereference our soon-to-be-gone 5309 * file_priv. 5310 */ 5311 spin_lock(&file_priv->mm.lock); 5312 while (!list_empty(&file_priv->mm.request_list)) { 5313 struct drm_i915_gem_request *request; 5314 5315 request = list_first_entry(&file_priv->mm.request_list, 5316 struct drm_i915_gem_request, 5317 client_list); 5318 list_del(&request->client_list); 5319 request->file_priv = NULL; 5320 } 5321 spin_unlock(&file_priv->mm.lock); 5322 5323 if (!list_empty(&file_priv->rps.link)) { 5324 lockmgr(&to_i915(dev)->rps.client_lock, LK_EXCLUSIVE); 5325 list_del(&file_priv->rps.link); 5326 lockmgr(&to_i915(dev)->rps.client_lock, LK_RELEASE); 5327 } 5328 } 5329 5330 int 5331 i915_gem_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, 5332 vm_ooffset_t foff, struct ucred *cred, u_short *color) 5333 { 5334 *color = 0; /* XXXKIB */ 5335 return (0); 5336 } 5337 5338 void 5339 i915_gem_pager_dtor(void *handle) 5340 { 5341 struct drm_gem_object *obj; 5342 struct drm_device *dev; 5343 5344 obj = handle; 5345 dev = obj->dev; 5346 5347 mutex_lock(&dev->struct_mutex); 5348 drm_gem_free_mmap_offset(obj); 5349 i915_gem_release_mmap(to_intel_bo(obj)); 5350 drm_gem_object_unreference(obj); 5351 mutex_unlock(&dev->struct_mutex); 5352 } 5353 5354 int i915_gem_open(struct drm_device *dev, struct drm_file *file) 5355 { 5356 struct drm_i915_file_private *file_priv; 5357 int ret; 5358 5359 DRM_DEBUG_DRIVER("\n"); 5360 5361 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 5362 if (!file_priv) 5363 return -ENOMEM; 5364 5365 file->driver_priv = file_priv; 5366 file_priv->dev_priv = dev->dev_private; 5367 file_priv->file = file; 5368 INIT_LIST_HEAD(&file_priv->rps.link); 5369 5370 spin_init(&file_priv->mm.lock, "i915_priv"); 5371 INIT_LIST_HEAD(&file_priv->mm.request_list); 5372 5373 file_priv->bsd_ring = -1; 5374 5375 ret = i915_gem_context_open(dev, file); 5376 if (ret) 5377 kfree(file_priv); 5378 5379 return ret; 5380 } 5381 5382 /** 5383 * i915_gem_track_fb - update frontbuffer tracking 5384 * @old: current GEM buffer for the frontbuffer slots 5385 * @new: new GEM buffer for the frontbuffer slots 5386 * @frontbuffer_bits: bitmask of frontbuffer slots 5387 * 5388 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them 5389 * from @old and setting them in @new. Both @old and @new can be NULL. 5390 */ 5391 void i915_gem_track_fb(struct drm_i915_gem_object *old, 5392 struct drm_i915_gem_object *new, 5393 unsigned frontbuffer_bits) 5394 { 5395 if (old) { 5396 WARN_ON(!mutex_is_locked(&old->base.dev->struct_mutex)); 5397 WARN_ON(!(old->frontbuffer_bits & frontbuffer_bits)); 5398 old->frontbuffer_bits &= ~frontbuffer_bits; 5399 } 5400 5401 if (new) { 5402 WARN_ON(!mutex_is_locked(&new->base.dev->struct_mutex)); 5403 WARN_ON(new->frontbuffer_bits & frontbuffer_bits); 5404 new->frontbuffer_bits |= frontbuffer_bits; 5405 } 5406 } 5407 5408 /* All the new VM stuff */ 5409 u64 i915_gem_obj_offset(struct drm_i915_gem_object *o, 5410 struct i915_address_space *vm) 5411 { 5412 struct drm_i915_private *dev_priv = o->base.dev->dev_private; 5413 struct i915_vma *vma; 5414 5415 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); 5416 5417 list_for_each_entry(vma, &o->vma_list, obj_link) { 5418 if (vma->is_ggtt && 5419 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5420 continue; 5421 if (vma->vm == vm) 5422 return vma->node.start; 5423 } 5424 5425 WARN(1, "%s vma for this object not found.\n", 5426 i915_is_ggtt(vm) ? "global" : "ppgtt"); 5427 return -1; 5428 } 5429 5430 u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o, 5431 const struct i915_ggtt_view *view) 5432 { 5433 struct drm_i915_private *dev_priv = to_i915(o->base.dev); 5434 struct i915_ggtt *ggtt = &dev_priv->ggtt; 5435 struct i915_vma *vma; 5436 5437 list_for_each_entry(vma, &o->vma_list, obj_link) 5438 if (vma->vm == &ggtt->base && 5439 i915_ggtt_view_equal(&vma->ggtt_view, view)) 5440 return vma->node.start; 5441 5442 WARN(1, "global vma for this object not found. (view=%u)\n", view->type); 5443 return -1; 5444 } 5445 5446 bool i915_gem_obj_bound(struct drm_i915_gem_object *o, 5447 struct i915_address_space *vm) 5448 { 5449 struct i915_vma *vma; 5450 5451 list_for_each_entry(vma, &o->vma_list, obj_link) { 5452 if (vma->is_ggtt && 5453 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5454 continue; 5455 if (vma->vm == vm && drm_mm_node_allocated(&vma->node)) 5456 return true; 5457 } 5458 5459 return false; 5460 } 5461 5462 bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o, 5463 const struct i915_ggtt_view *view) 5464 { 5465 struct drm_i915_private *dev_priv = to_i915(o->base.dev); 5466 struct i915_ggtt *ggtt = &dev_priv->ggtt; 5467 struct i915_vma *vma; 5468 5469 list_for_each_entry(vma, &o->vma_list, obj_link) 5470 if (vma->vm == &ggtt->base && 5471 i915_ggtt_view_equal(&vma->ggtt_view, view) && 5472 drm_mm_node_allocated(&vma->node)) 5473 return true; 5474 5475 return false; 5476 } 5477 5478 bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o) 5479 { 5480 struct i915_vma *vma; 5481 5482 list_for_each_entry(vma, &o->vma_list, obj_link) 5483 if (drm_mm_node_allocated(&vma->node)) 5484 return true; 5485 5486 return false; 5487 } 5488 5489 unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o, 5490 struct i915_address_space *vm) 5491 { 5492 struct drm_i915_private *dev_priv = o->base.dev->dev_private; 5493 struct i915_vma *vma; 5494 5495 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); 5496 5497 BUG_ON(list_empty(&o->vma_list)); 5498 5499 list_for_each_entry(vma, &o->vma_list, obj_link) { 5500 if (vma->is_ggtt && 5501 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5502 continue; 5503 if (vma->vm == vm) 5504 return vma->node.size; 5505 } 5506 return 0; 5507 } 5508 5509 bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj) 5510 { 5511 struct i915_vma *vma; 5512 list_for_each_entry(vma, &obj->vma_list, obj_link) 5513 if (vma->pin_count > 0) 5514 return true; 5515 5516 return false; 5517 } 5518 5519 /* Like i915_gem_object_get_page(), but mark the returned page dirty */ 5520 struct page * 5521 i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n) 5522 { 5523 struct page *page; 5524 5525 /* Only default objects have per-page dirty tracking */ 5526 if (WARN_ON((obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE) == 0)) 5527 return NULL; 5528 5529 page = i915_gem_object_get_page(obj, n); 5530 set_page_dirty(page); 5531 return page; 5532 } 5533 5534 /* Allocate a new GEM object and fill it with the supplied data */ 5535 struct drm_i915_gem_object * 5536 i915_gem_object_create_from_data(struct drm_device *dev, 5537 const void *data, size_t size) 5538 { 5539 struct drm_i915_gem_object *obj; 5540 struct sg_table *sg; 5541 size_t bytes; 5542 int ret; 5543 5544 obj = i915_gem_alloc_object(dev, round_up(size, PAGE_SIZE)); 5545 if (IS_ERR_OR_NULL(obj)) 5546 return obj; 5547 5548 ret = i915_gem_object_set_to_cpu_domain(obj, true); 5549 if (ret) 5550 goto fail; 5551 5552 ret = i915_gem_object_get_pages(obj); 5553 if (ret) 5554 goto fail; 5555 5556 i915_gem_object_pin_pages(obj); 5557 sg = obj->pages; 5558 bytes = sg_copy_from_buffer(sg->sgl, sg->nents, data, size); 5559 obj->dirty = 1; /* Backing store is now out of date */ 5560 i915_gem_object_unpin_pages(obj); 5561 5562 if (WARN_ON(bytes != size)) { 5563 DRM_ERROR("Incomplete copy, wrote %zu of %zu", bytes, size); 5564 ret = -EFAULT; 5565 goto fail; 5566 } 5567 5568 return obj; 5569 5570 fail: 5571 drm_gem_object_unreference(&obj->base); 5572 return ERR_PTR(ret); 5573 } 5574