1 /* 2 * Copyright © 2008-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include <drm/drmP.h> 29 #include <drm/drm_vma_manager.h> 30 #include <drm/i915_drm.h> 31 #include "i915_drv.h" 32 #include "i915_vgpu.h" 33 #include "i915_trace.h" 34 #include "intel_drv.h" 35 #include "intel_mocs.h" 36 #include <linux/shmem_fs.h> 37 #include <linux/slab.h> 38 #include <linux/swap.h> 39 #include <linux/pci.h> 40 41 #include <sys/mman.h> 42 #include <vm/vm_map.h> 43 #include <vm/vm_param.h> 44 45 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); 46 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); 47 static void 48 i915_gem_object_retire__write(struct drm_i915_gem_object *obj); 49 static void 50 i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring); 51 52 static bool cpu_cache_is_coherent(struct drm_device *dev, 53 enum i915_cache_level level) 54 { 55 return HAS_LLC(dev) || level != I915_CACHE_NONE; 56 } 57 58 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 59 { 60 if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) 61 return true; 62 63 return obj->pin_display; 64 } 65 66 /* some bookkeeping */ 67 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 68 size_t size) 69 { 70 spin_lock(&dev_priv->mm.object_stat_lock); 71 dev_priv->mm.object_count++; 72 dev_priv->mm.object_memory += size; 73 spin_unlock(&dev_priv->mm.object_stat_lock); 74 } 75 76 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 77 size_t size) 78 { 79 spin_lock(&dev_priv->mm.object_stat_lock); 80 dev_priv->mm.object_count--; 81 dev_priv->mm.object_memory -= size; 82 spin_unlock(&dev_priv->mm.object_stat_lock); 83 } 84 85 static int 86 i915_gem_wait_for_error(struct i915_gpu_error *error) 87 { 88 int ret; 89 90 if (!i915_reset_in_progress(error)) 91 return 0; 92 93 /* 94 * Only wait 10 seconds for the gpu reset to complete to avoid hanging 95 * userspace. If it takes that long something really bad is going on and 96 * we should simply try to bail out and fail as gracefully as possible. 97 */ 98 ret = wait_event_interruptible_timeout(error->reset_queue, 99 !i915_reset_in_progress(error), 100 10*HZ); 101 if (ret == 0) { 102 DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); 103 return -EIO; 104 } else if (ret < 0) { 105 return ret; 106 } else { 107 return 0; 108 } 109 } 110 111 int i915_mutex_lock_interruptible(struct drm_device *dev) 112 { 113 struct drm_i915_private *dev_priv = dev->dev_private; 114 int ret; 115 116 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 117 if (ret) 118 return ret; 119 120 ret = mutex_lock_interruptible(&dev->struct_mutex); 121 if (ret) 122 return ret; 123 124 WARN_ON(i915_verify_lists(dev)); 125 return 0; 126 } 127 128 int 129 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 130 struct drm_file *file) 131 { 132 struct drm_i915_private *dev_priv = to_i915(dev); 133 struct i915_ggtt *ggtt = &dev_priv->ggtt; 134 struct drm_i915_gem_get_aperture *args = data; 135 struct i915_vma *vma; 136 size_t pinned; 137 138 pinned = 0; 139 mutex_lock(&dev->struct_mutex); 140 list_for_each_entry(vma, &ggtt->base.active_list, vm_link) 141 if (vma->pin_count) 142 pinned += vma->node.size; 143 list_for_each_entry(vma, &ggtt->base.inactive_list, vm_link) 144 if (vma->pin_count) 145 pinned += vma->node.size; 146 mutex_unlock(&dev->struct_mutex); 147 148 args->aper_size = ggtt->base.total; 149 args->aper_available_size = args->aper_size - pinned; 150 151 return 0; 152 } 153 154 #if 0 155 static int 156 i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) 157 { 158 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; 159 char *vaddr = obj->phys_handle->vaddr; 160 struct sg_table *st; 161 struct scatterlist *sg; 162 int i; 163 164 if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) 165 return -EINVAL; 166 167 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 168 struct page *page; 169 char *src; 170 171 page = shmem_read_mapping_page(mapping, i); 172 if (IS_ERR(page)) 173 return PTR_ERR(page); 174 175 src = kmap_atomic(page); 176 memcpy(vaddr, src, PAGE_SIZE); 177 drm_clflush_virt_range(vaddr, PAGE_SIZE); 178 kunmap_atomic(src); 179 180 put_page(page); 181 vaddr += PAGE_SIZE; 182 } 183 184 i915_gem_chipset_flush(obj->base.dev); 185 186 st = kmalloc(sizeof(*st), GFP_KERNEL); 187 if (st == NULL) 188 return -ENOMEM; 189 190 if (sg_alloc_table(st, 1, GFP_KERNEL)) { 191 kfree(st); 192 return -ENOMEM; 193 } 194 195 sg = st->sgl; 196 sg->offset = 0; 197 sg->length = obj->base.size; 198 199 sg_dma_address(sg) = obj->phys_handle->busaddr; 200 sg_dma_len(sg) = obj->base.size; 201 202 obj->pages = st; 203 return 0; 204 } 205 206 static void 207 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj) 208 { 209 int ret; 210 211 BUG_ON(obj->madv == __I915_MADV_PURGED); 212 213 ret = i915_gem_object_set_to_cpu_domain(obj, true); 214 if (WARN_ON(ret)) { 215 /* In the event of a disaster, abandon all caches and 216 * hope for the best. 217 */ 218 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 219 } 220 221 if (obj->madv == I915_MADV_DONTNEED) 222 obj->dirty = 0; 223 224 if (obj->dirty) { 225 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; 226 char *vaddr = obj->phys_handle->vaddr; 227 int i; 228 229 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 230 struct page *page; 231 char *dst; 232 233 page = shmem_read_mapping_page(mapping, i); 234 if (IS_ERR(page)) 235 continue; 236 237 dst = kmap_atomic(page); 238 drm_clflush_virt_range(vaddr, PAGE_SIZE); 239 memcpy(dst, vaddr, PAGE_SIZE); 240 kunmap_atomic(dst); 241 242 set_page_dirty(page); 243 if (obj->madv == I915_MADV_WILLNEED) 244 mark_page_accessed(page); 245 put_page(page); 246 vaddr += PAGE_SIZE; 247 } 248 obj->dirty = 0; 249 } 250 251 sg_free_table(obj->pages); 252 kfree(obj->pages); 253 } 254 255 static void 256 i915_gem_object_release_phys(struct drm_i915_gem_object *obj) 257 { 258 drm_pci_free(obj->base.dev, obj->phys_handle); 259 } 260 261 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { 262 .get_pages = i915_gem_object_get_pages_phys, 263 .put_pages = i915_gem_object_put_pages_phys, 264 .release = i915_gem_object_release_phys, 265 }; 266 #endif 267 268 static int 269 drop_pages(struct drm_i915_gem_object *obj) 270 { 271 struct i915_vma *vma, *next; 272 int ret; 273 274 drm_gem_object_reference(&obj->base); 275 list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) 276 if (i915_vma_unbind(vma)) 277 break; 278 279 ret = i915_gem_object_put_pages(obj); 280 drm_gem_object_unreference(&obj->base); 281 282 return ret; 283 } 284 285 int 286 i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, 287 int align) 288 { 289 drm_dma_handle_t *phys; 290 int ret; 291 292 if (obj->phys_handle) { 293 if ((unsigned long)obj->phys_handle->vaddr & (align -1)) 294 return -EBUSY; 295 296 return 0; 297 } 298 299 if (obj->madv != I915_MADV_WILLNEED) 300 return -EFAULT; 301 302 #if 0 303 if (obj->base.filp == NULL) 304 return -EINVAL; 305 #endif 306 307 ret = drop_pages(obj); 308 if (ret) 309 return ret; 310 311 /* create a new object */ 312 phys = drm_pci_alloc(obj->base.dev, obj->base.size, align); 313 if (!phys) 314 return -ENOMEM; 315 316 obj->phys_handle = phys; 317 #if 0 318 obj->ops = &i915_gem_phys_ops; 319 #endif 320 321 return i915_gem_object_get_pages(obj); 322 } 323 324 static int 325 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, 326 struct drm_i915_gem_pwrite *args, 327 struct drm_file *file_priv) 328 { 329 struct drm_device *dev = obj->base.dev; 330 void *vaddr = (char *)obj->phys_handle->vaddr + args->offset; 331 char __user *user_data = u64_to_user_ptr(args->data_ptr); 332 int ret = 0; 333 334 /* We manually control the domain here and pretend that it 335 * remains coherent i.e. in the GTT domain, like shmem_pwrite. 336 */ 337 ret = i915_gem_object_wait_rendering(obj, false); 338 if (ret) 339 return ret; 340 341 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 342 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) { 343 unsigned long unwritten; 344 345 /* The physical object once assigned is fixed for the lifetime 346 * of the obj, so we can safely drop the lock and continue 347 * to access vaddr. 348 */ 349 mutex_unlock(&dev->struct_mutex); 350 unwritten = copy_from_user(vaddr, user_data, args->size); 351 mutex_lock(&dev->struct_mutex); 352 if (unwritten) { 353 ret = -EFAULT; 354 goto out; 355 } 356 } 357 358 drm_clflush_virt_range(vaddr, args->size); 359 i915_gem_chipset_flush(dev); 360 361 out: 362 intel_fb_obj_flush(obj, false, ORIGIN_CPU); 363 return ret; 364 } 365 366 void *i915_gem_object_alloc(struct drm_device *dev) 367 { 368 return kmalloc(sizeof(struct drm_i915_gem_object), 369 M_DRM, M_WAITOK | M_ZERO); 370 } 371 372 void i915_gem_object_free(struct drm_i915_gem_object *obj) 373 { 374 kfree(obj); 375 } 376 377 static int 378 i915_gem_create(struct drm_file *file, 379 struct drm_device *dev, 380 uint64_t size, 381 uint32_t *handle_p) 382 { 383 struct drm_i915_gem_object *obj; 384 int ret; 385 u32 handle; 386 387 size = roundup(size, PAGE_SIZE); 388 if (size == 0) 389 return -EINVAL; 390 391 /* Allocate the new object */ 392 obj = i915_gem_alloc_object(dev, size); 393 if (obj == NULL) 394 return -ENOMEM; 395 396 ret = drm_gem_handle_create(file, &obj->base, &handle); 397 /* drop reference from allocate - handle holds it now */ 398 drm_gem_object_unreference_unlocked(&obj->base); 399 if (ret) 400 return ret; 401 402 *handle_p = handle; 403 return 0; 404 } 405 406 int 407 i915_gem_dumb_create(struct drm_file *file, 408 struct drm_device *dev, 409 struct drm_mode_create_dumb *args) 410 { 411 /* have to work out size/pitch and return them */ 412 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); 413 args->size = args->pitch * args->height; 414 return i915_gem_create(file, dev, 415 args->size, &args->handle); 416 } 417 418 /** 419 * Creates a new mm object and returns a handle to it. 420 */ 421 int 422 i915_gem_create_ioctl(struct drm_device *dev, void *data, 423 struct drm_file *file) 424 { 425 struct drm_i915_gem_create *args = data; 426 427 return i915_gem_create(file, dev, 428 args->size, &args->handle); 429 } 430 431 static inline int 432 __copy_to_user_swizzled(char __user *cpu_vaddr, 433 const char *gpu_vaddr, int gpu_offset, 434 int length) 435 { 436 int ret, cpu_offset = 0; 437 438 while (length > 0) { 439 int cacheline_end = ALIGN(gpu_offset + 1, 64); 440 int this_length = min(cacheline_end - gpu_offset, length); 441 int swizzled_gpu_offset = gpu_offset ^ 64; 442 443 ret = __copy_to_user(cpu_vaddr + cpu_offset, 444 gpu_vaddr + swizzled_gpu_offset, 445 this_length); 446 if (ret) 447 return ret + length; 448 449 cpu_offset += this_length; 450 gpu_offset += this_length; 451 length -= this_length; 452 } 453 454 return 0; 455 } 456 457 static inline int 458 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, 459 const char __user *cpu_vaddr, 460 int length) 461 { 462 int ret, cpu_offset = 0; 463 464 while (length > 0) { 465 int cacheline_end = ALIGN(gpu_offset + 1, 64); 466 int this_length = min(cacheline_end - gpu_offset, length); 467 int swizzled_gpu_offset = gpu_offset ^ 64; 468 469 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, 470 cpu_vaddr + cpu_offset, 471 this_length); 472 if (ret) 473 return ret + length; 474 475 cpu_offset += this_length; 476 gpu_offset += this_length; 477 length -= this_length; 478 } 479 480 return 0; 481 } 482 483 /* 484 * Pins the specified object's pages and synchronizes the object with 485 * GPU accesses. Sets needs_clflush to non-zero if the caller should 486 * flush the object from the CPU cache. 487 */ 488 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, 489 int *needs_clflush) 490 { 491 int ret; 492 493 *needs_clflush = 0; 494 495 #if 0 496 if (WARN_ON((obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE) == 0)) 497 return -EINVAL; 498 #endif 499 500 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) { 501 /* If we're not in the cpu read domain, set ourself into the gtt 502 * read domain and manually flush cachelines (if required). This 503 * optimizes for the case when the gpu will dirty the data 504 * anyway again before the next pread happens. */ 505 *needs_clflush = !cpu_cache_is_coherent(obj->base.dev, 506 obj->cache_level); 507 ret = i915_gem_object_wait_rendering(obj, true); 508 if (ret) 509 return ret; 510 } 511 512 ret = i915_gem_object_get_pages(obj); 513 if (ret) 514 return ret; 515 516 i915_gem_object_pin_pages(obj); 517 518 return ret; 519 } 520 521 /* Per-page copy function for the shmem pread fastpath. 522 * Flushes invalid cachelines before reading the target if 523 * needs_clflush is set. */ 524 static int 525 shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length, 526 char __user *user_data, 527 bool page_do_bit17_swizzling, bool needs_clflush) 528 { 529 char *vaddr; 530 int ret; 531 532 if (unlikely(page_do_bit17_swizzling)) 533 return -EINVAL; 534 535 vaddr = kmap_atomic(page); 536 if (needs_clflush) 537 drm_clflush_virt_range(vaddr + shmem_page_offset, 538 page_length); 539 ret = __copy_to_user_inatomic(user_data, 540 vaddr + shmem_page_offset, 541 page_length); 542 kunmap_atomic(vaddr); 543 544 return ret ? -EFAULT : 0; 545 } 546 547 static void 548 shmem_clflush_swizzled_range(char *addr, unsigned long length, 549 bool swizzled) 550 { 551 if (unlikely(swizzled)) { 552 unsigned long start = (unsigned long) addr; 553 unsigned long end = (unsigned long) addr + length; 554 555 /* For swizzling simply ensure that we always flush both 556 * channels. Lame, but simple and it works. Swizzled 557 * pwrite/pread is far from a hotpath - current userspace 558 * doesn't use it at all. */ 559 start = round_down(start, 128); 560 end = round_up(end, 128); 561 562 drm_clflush_virt_range((void *)start, end - start); 563 } else { 564 drm_clflush_virt_range(addr, length); 565 } 566 567 } 568 569 /* Only difference to the fast-path function is that this can handle bit17 570 * and uses non-atomic copy and kmap functions. */ 571 static int 572 shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length, 573 char __user *user_data, 574 bool page_do_bit17_swizzling, bool needs_clflush) 575 { 576 char *vaddr; 577 int ret; 578 579 vaddr = kmap(page); 580 if (needs_clflush) 581 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 582 page_length, 583 page_do_bit17_swizzling); 584 585 if (page_do_bit17_swizzling) 586 ret = __copy_to_user_swizzled(user_data, 587 vaddr, shmem_page_offset, 588 page_length); 589 else 590 ret = __copy_to_user(user_data, 591 vaddr + shmem_page_offset, 592 page_length); 593 kunmap(page); 594 595 return ret ? - EFAULT : 0; 596 } 597 598 static int 599 i915_gem_shmem_pread(struct drm_device *dev, 600 struct drm_i915_gem_object *obj, 601 struct drm_i915_gem_pread *args, 602 struct drm_file *file) 603 { 604 char __user *user_data; 605 ssize_t remain; 606 loff_t offset; 607 int shmem_page_offset, page_length, ret = 0; 608 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 609 int prefaulted = 0; 610 int needs_clflush = 0; 611 struct sg_page_iter sg_iter; 612 613 user_data = u64_to_user_ptr(args->data_ptr); 614 remain = args->size; 615 616 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 617 618 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); 619 if (ret) 620 return ret; 621 622 offset = args->offset; 623 624 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 625 offset >> PAGE_SHIFT) { 626 struct page *page = sg_page_iter_page(&sg_iter); 627 628 if (remain <= 0) 629 break; 630 631 /* Operation in this page 632 * 633 * shmem_page_offset = offset within page in shmem file 634 * page_length = bytes to copy for this page 635 */ 636 shmem_page_offset = offset_in_page(offset); 637 page_length = remain; 638 if ((shmem_page_offset + page_length) > PAGE_SIZE) 639 page_length = PAGE_SIZE - shmem_page_offset; 640 641 page_do_bit17_swizzling = obj_do_bit17_swizzling && 642 (page_to_phys(page) & (1 << 17)) != 0; 643 644 ret = shmem_pread_fast(page, shmem_page_offset, page_length, 645 user_data, page_do_bit17_swizzling, 646 needs_clflush); 647 if (ret == 0) 648 goto next_page; 649 650 mutex_unlock(&dev->struct_mutex); 651 652 if (likely(!i915.prefault_disable) && !prefaulted) { 653 ret = fault_in_multipages_writeable(user_data, remain); 654 /* Userspace is tricking us, but we've already clobbered 655 * its pages with the prefault and promised to write the 656 * data up to the first fault. Hence ignore any errors 657 * and just continue. */ 658 (void)ret; 659 prefaulted = 1; 660 } 661 662 ret = shmem_pread_slow(page, shmem_page_offset, page_length, 663 user_data, page_do_bit17_swizzling, 664 needs_clflush); 665 666 mutex_lock(&dev->struct_mutex); 667 668 if (ret) 669 goto out; 670 671 next_page: 672 remain -= page_length; 673 user_data += page_length; 674 offset += page_length; 675 } 676 677 out: 678 i915_gem_object_unpin_pages(obj); 679 680 return ret; 681 } 682 683 /** 684 * Reads data from the object referenced by handle. 685 * 686 * On error, the contents of *data are undefined. 687 */ 688 int 689 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 690 struct drm_file *file) 691 { 692 struct drm_i915_gem_pread *args = data; 693 struct drm_i915_gem_object *obj; 694 int ret = 0; 695 696 if (args->size == 0) 697 return 0; 698 699 #if 0 700 if (!access_ok(VERIFY_WRITE, 701 u64_to_user_ptr(args->data_ptr), 702 args->size)) 703 return -EFAULT; 704 #endif 705 706 ret = i915_mutex_lock_interruptible(dev); 707 if (ret) 708 return ret; 709 710 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 711 if (&obj->base == NULL) { 712 ret = -ENOENT; 713 goto unlock; 714 } 715 716 /* Bounds check source. */ 717 if (args->offset > obj->base.size || 718 args->size > obj->base.size - args->offset) { 719 ret = -EINVAL; 720 goto out; 721 } 722 723 /* prime objects have no backing filp to GEM pread/pwrite 724 * pages from. 725 */ 726 727 trace_i915_gem_object_pread(obj, args->offset, args->size); 728 729 ret = i915_gem_shmem_pread(dev, obj, args, file); 730 731 out: 732 drm_gem_object_unreference(&obj->base); 733 unlock: 734 mutex_unlock(&dev->struct_mutex); 735 return ret; 736 } 737 738 /* This is the fast write path which cannot handle 739 * page faults in the source data 740 */ 741 742 static inline int 743 fast_user_write(struct io_mapping *mapping, 744 loff_t page_base, int page_offset, 745 char __user *user_data, 746 int length) 747 { 748 void __iomem *vaddr_atomic; 749 void *vaddr; 750 unsigned long unwritten; 751 752 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); 753 /* We can use the cpu mem copy function because this is X86. */ 754 vaddr = (char __force*)vaddr_atomic + page_offset; 755 unwritten = __copy_from_user_inatomic_nocache(vaddr, 756 user_data, length); 757 io_mapping_unmap_atomic(vaddr_atomic); 758 return unwritten; 759 } 760 761 /** 762 * This is the fast pwrite path, where we copy the data directly from the 763 * user into the GTT, uncached. 764 */ 765 static int 766 i915_gem_gtt_pwrite_fast(struct drm_device *dev, 767 struct drm_i915_gem_object *obj, 768 struct drm_i915_gem_pwrite *args, 769 struct drm_file *file) 770 { 771 struct drm_i915_private *dev_priv = to_i915(dev); 772 struct i915_ggtt *ggtt = &dev_priv->ggtt; 773 ssize_t remain; 774 loff_t offset, page_base; 775 char __user *user_data; 776 int page_offset, page_length, ret; 777 778 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK); 779 if (ret) 780 goto out; 781 782 ret = i915_gem_object_set_to_gtt_domain(obj, true); 783 if (ret) 784 goto out_unpin; 785 786 ret = i915_gem_object_put_fence(obj); 787 if (ret) 788 goto out_unpin; 789 790 user_data = u64_to_user_ptr(args->data_ptr); 791 remain = args->size; 792 793 offset = i915_gem_obj_ggtt_offset(obj) + args->offset; 794 795 intel_fb_obj_invalidate(obj, ORIGIN_GTT); 796 797 while (remain > 0) { 798 /* Operation in this page 799 * 800 * page_base = page offset within aperture 801 * page_offset = offset within page 802 * page_length = bytes to copy for this page 803 */ 804 page_base = offset & ~PAGE_MASK; 805 page_offset = offset_in_page(offset); 806 page_length = remain; 807 if ((page_offset + remain) > PAGE_SIZE) 808 page_length = PAGE_SIZE - page_offset; 809 810 /* If we get a fault while copying data, then (presumably) our 811 * source page isn't available. Return the error and we'll 812 * retry in the slow path. 813 */ 814 if (fast_user_write(ggtt->mappable, page_base, 815 page_offset, user_data, page_length)) { 816 ret = -EFAULT; 817 goto out_flush; 818 } 819 820 remain -= page_length; 821 user_data += page_length; 822 offset += page_length; 823 } 824 825 out_flush: 826 intel_fb_obj_flush(obj, false, ORIGIN_GTT); 827 out_unpin: 828 i915_gem_object_ggtt_unpin(obj); 829 out: 830 return ret; 831 } 832 833 /* Per-page copy function for the shmem pwrite fastpath. 834 * Flushes invalid cachelines before writing to the target if 835 * needs_clflush_before is set and flushes out any written cachelines after 836 * writing if needs_clflush is set. */ 837 static int 838 shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length, 839 char __user *user_data, 840 bool page_do_bit17_swizzling, 841 bool needs_clflush_before, 842 bool needs_clflush_after) 843 { 844 char *vaddr; 845 int ret; 846 847 if (unlikely(page_do_bit17_swizzling)) 848 return -EINVAL; 849 850 vaddr = kmap_atomic(page); 851 if (needs_clflush_before) 852 drm_clflush_virt_range(vaddr + shmem_page_offset, 853 page_length); 854 ret = __copy_from_user_inatomic(vaddr + shmem_page_offset, 855 user_data, page_length); 856 if (needs_clflush_after) 857 drm_clflush_virt_range(vaddr + shmem_page_offset, 858 page_length); 859 kunmap_atomic(vaddr); 860 861 return ret ? -EFAULT : 0; 862 } 863 864 /* Only difference to the fast-path function is that this can handle bit17 865 * and uses non-atomic copy and kmap functions. */ 866 static int 867 shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length, 868 char __user *user_data, 869 bool page_do_bit17_swizzling, 870 bool needs_clflush_before, 871 bool needs_clflush_after) 872 { 873 char *vaddr; 874 int ret; 875 876 vaddr = kmap(page); 877 if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) 878 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 879 page_length, 880 page_do_bit17_swizzling); 881 if (page_do_bit17_swizzling) 882 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset, 883 user_data, 884 page_length); 885 else 886 ret = __copy_from_user(vaddr + shmem_page_offset, 887 user_data, 888 page_length); 889 if (needs_clflush_after) 890 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 891 page_length, 892 page_do_bit17_swizzling); 893 kunmap(page); 894 895 return ret ? -EFAULT : 0; 896 } 897 898 static int 899 i915_gem_shmem_pwrite(struct drm_device *dev, 900 struct drm_i915_gem_object *obj, 901 struct drm_i915_gem_pwrite *args, 902 struct drm_file *file) 903 { 904 ssize_t remain; 905 loff_t offset; 906 char __user *user_data; 907 int shmem_page_offset, page_length, ret = 0; 908 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 909 int hit_slowpath = 0; 910 int needs_clflush_after = 0; 911 int needs_clflush_before = 0; 912 struct sg_page_iter sg_iter; 913 914 user_data = u64_to_user_ptr(args->data_ptr); 915 remain = args->size; 916 917 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 918 919 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 920 /* If we're not in the cpu write domain, set ourself into the gtt 921 * write domain and manually flush cachelines (if required). This 922 * optimizes for the case when the gpu will use the data 923 * right away and we therefore have to clflush anyway. */ 924 needs_clflush_after = cpu_write_needs_clflush(obj); 925 ret = i915_gem_object_wait_rendering(obj, false); 926 if (ret) 927 return ret; 928 } 929 /* Same trick applies to invalidate partially written cachelines read 930 * before writing. */ 931 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) 932 needs_clflush_before = 933 !cpu_cache_is_coherent(dev, obj->cache_level); 934 935 ret = i915_gem_object_get_pages(obj); 936 if (ret) 937 return ret; 938 939 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 940 941 i915_gem_object_pin_pages(obj); 942 943 offset = args->offset; 944 obj->dirty = 1; 945 946 VM_OBJECT_LOCK(obj->base.vm_obj); 947 vm_object_pip_add(obj->base.vm_obj, 1); 948 949 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 950 offset >> PAGE_SHIFT) { 951 struct page *page = sg_page_iter_page(&sg_iter); 952 int partial_cacheline_write; 953 954 if (remain <= 0) 955 break; 956 957 /* Operation in this page 958 * 959 * shmem_page_offset = offset within page in shmem file 960 * page_length = bytes to copy for this page 961 */ 962 shmem_page_offset = offset_in_page(offset); 963 964 page_length = remain; 965 if ((shmem_page_offset + page_length) > PAGE_SIZE) 966 page_length = PAGE_SIZE - shmem_page_offset; 967 968 /* If we don't overwrite a cacheline completely we need to be 969 * careful to have up-to-date data by first clflushing. Don't 970 * overcomplicate things and flush the entire patch. */ 971 partial_cacheline_write = needs_clflush_before && 972 ((shmem_page_offset | page_length) 973 & (cpu_clflush_line_size - 1)); 974 975 page_do_bit17_swizzling = obj_do_bit17_swizzling && 976 (page_to_phys(page) & (1 << 17)) != 0; 977 978 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length, 979 user_data, page_do_bit17_swizzling, 980 partial_cacheline_write, 981 needs_clflush_after); 982 if (ret == 0) 983 goto next_page; 984 985 hit_slowpath = 1; 986 mutex_unlock(&dev->struct_mutex); 987 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length, 988 user_data, page_do_bit17_swizzling, 989 partial_cacheline_write, 990 needs_clflush_after); 991 992 mutex_lock(&dev->struct_mutex); 993 994 if (ret) 995 goto out; 996 997 next_page: 998 remain -= page_length; 999 user_data += page_length; 1000 offset += page_length; 1001 } 1002 vm_object_pip_wakeup(obj->base.vm_obj); 1003 VM_OBJECT_UNLOCK(obj->base.vm_obj); 1004 1005 out: 1006 i915_gem_object_unpin_pages(obj); 1007 1008 if (hit_slowpath) { 1009 /* 1010 * Fixup: Flush cpu caches in case we didn't flush the dirty 1011 * cachelines in-line while writing and the object moved 1012 * out of the cpu write domain while we've dropped the lock. 1013 */ 1014 if (!needs_clflush_after && 1015 obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 1016 if (i915_gem_clflush_object(obj, obj->pin_display)) 1017 needs_clflush_after = true; 1018 } 1019 } 1020 1021 if (needs_clflush_after) 1022 i915_gem_chipset_flush(dev); 1023 else 1024 obj->cache_dirty = true; 1025 1026 intel_fb_obj_flush(obj, false, ORIGIN_CPU); 1027 return ret; 1028 } 1029 1030 /** 1031 * Writes data to the object referenced by handle. 1032 * 1033 * On error, the contents of the buffer that were to be modified are undefined. 1034 */ 1035 int 1036 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 1037 struct drm_file *file) 1038 { 1039 struct drm_i915_private *dev_priv = dev->dev_private; 1040 struct drm_i915_gem_pwrite *args = data; 1041 struct drm_i915_gem_object *obj; 1042 int ret; 1043 1044 if (args->size == 0) 1045 return 0; 1046 1047 #if 0 1048 if (!access_ok(VERIFY_READ, 1049 u64_to_user_ptr(args->data_ptr), 1050 args->size)) 1051 return -EFAULT; 1052 #endif 1053 1054 if (likely(!i915.prefault_disable)) { 1055 ret = fault_in_multipages_readable(u64_to_user_ptr(args->data_ptr), 1056 args->size); 1057 if (ret) 1058 return -EFAULT; 1059 } 1060 1061 intel_runtime_pm_get(dev_priv); 1062 1063 ret = i915_mutex_lock_interruptible(dev); 1064 if (ret) 1065 goto put_rpm; 1066 1067 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 1068 if (&obj->base == NULL) { 1069 ret = -ENOENT; 1070 goto unlock; 1071 } 1072 1073 /* Bounds check destination. */ 1074 if (args->offset > obj->base.size || 1075 args->size > obj->base.size - args->offset) { 1076 ret = -EINVAL; 1077 goto out; 1078 } 1079 1080 /* prime objects have no backing filp to GEM pread/pwrite 1081 * pages from. 1082 */ 1083 1084 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 1085 1086 ret = -EFAULT; 1087 /* We can only do the GTT pwrite on untiled buffers, as otherwise 1088 * it would end up going through the fenced access, and we'll get 1089 * different detiling behavior between reading and writing. 1090 * pread/pwrite currently are reading and writing from the CPU 1091 * perspective, requiring manual detiling by the client. 1092 */ 1093 if (obj->tiling_mode == I915_TILING_NONE && 1094 obj->base.write_domain != I915_GEM_DOMAIN_CPU && 1095 cpu_write_needs_clflush(obj)) { 1096 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); 1097 /* Note that the gtt paths might fail with non-page-backed user 1098 * pointers (e.g. gtt mappings when moving data between 1099 * textures). Fallback to the shmem path in that case. */ 1100 } 1101 1102 if (ret == -EFAULT || ret == -ENOSPC) { 1103 if (obj->phys_handle) 1104 ret = i915_gem_phys_pwrite(obj, args, file); 1105 else 1106 ret = i915_gem_shmem_pwrite(dev, obj, args, file); 1107 } 1108 1109 out: 1110 drm_gem_object_unreference(&obj->base); 1111 unlock: 1112 mutex_unlock(&dev->struct_mutex); 1113 put_rpm: 1114 intel_runtime_pm_put(dev_priv); 1115 1116 return ret; 1117 } 1118 1119 static int 1120 i915_gem_check_wedge(unsigned reset_counter, bool interruptible) 1121 { 1122 if (__i915_terminally_wedged(reset_counter)) 1123 return -EIO; 1124 1125 if (__i915_reset_in_progress(reset_counter)) { 1126 /* Non-interruptible callers can't handle -EAGAIN, hence return 1127 * -EIO unconditionally for these. */ 1128 if (!interruptible) 1129 return -EIO; 1130 1131 return -EAGAIN; 1132 } 1133 1134 return 0; 1135 } 1136 1137 static void fake_irq(unsigned long data) 1138 { 1139 wakeup_one((void *)data); 1140 } 1141 1142 static bool missed_irq(struct drm_i915_private *dev_priv, 1143 struct intel_engine_cs *engine) 1144 { 1145 return test_bit(engine->id, &dev_priv->gpu_error.missed_irq_rings); 1146 } 1147 1148 #if 0 1149 static int __i915_spin_request(struct drm_i915_gem_request *req, int state) 1150 { 1151 unsigned long timeout; 1152 unsigned cpu; 1153 1154 /* When waiting for high frequency requests, e.g. during synchronous 1155 * rendering split between the CPU and GPU, the finite amount of time 1156 * required to set up the irq and wait upon it limits the response 1157 * rate. By busywaiting on the request completion for a short while we 1158 * can service the high frequency waits as quick as possible. However, 1159 * if it is a slow request, we want to sleep as quickly as possible. 1160 * The tradeoff between waiting and sleeping is roughly the time it 1161 * takes to sleep on a request, on the order of a microsecond. 1162 */ 1163 1164 if (req->engine->irq_refcount) 1165 return -EBUSY; 1166 1167 /* Only spin if we know the GPU is processing this request */ 1168 if (!i915_gem_request_started(req, true)) 1169 return -EAGAIN; 1170 1171 timeout = local_clock_us(&cpu) + 5; 1172 while (!need_resched()) { 1173 if (i915_gem_request_completed(req, true)) 1174 return 0; 1175 1176 if (signal_pending_state(state, current)) 1177 break; 1178 1179 if (busywait_stop(timeout, cpu)) 1180 break; 1181 1182 cpu_relax_lowlatency(); 1183 } 1184 1185 if (i915_gem_request_completed(req, false)) 1186 return 0; 1187 1188 return -EAGAIN; 1189 } 1190 #endif 1191 1192 /** 1193 * __i915_wait_request - wait until execution of request has finished 1194 * @req: duh! 1195 * @interruptible: do an interruptible wait (normally yes) 1196 * @timeout: in - how long to wait (NULL forever); out - how much time remaining 1197 * 1198 * Note: It is of utmost importance that the passed in seqno and reset_counter 1199 * values have been read by the caller in an smp safe manner. Where read-side 1200 * locks are involved, it is sufficient to read the reset_counter before 1201 * unlocking the lock that protects the seqno. For lockless tricks, the 1202 * reset_counter _must_ be read before, and an appropriate smp_rmb must be 1203 * inserted. 1204 * 1205 * Returns 0 if the request was found within the alloted time. Else returns the 1206 * errno with remaining time filled in timeout argument. 1207 */ 1208 int __i915_wait_request(struct drm_i915_gem_request *req, 1209 bool interruptible, 1210 s64 *timeout, 1211 struct intel_rps_client *rps) 1212 { 1213 struct intel_engine_cs *engine = i915_gem_request_get_engine(req); 1214 struct drm_device *dev = engine->dev; 1215 struct drm_i915_private *dev_priv = dev->dev_private; 1216 const bool irq_test_in_progress = 1217 ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_engine_flag(engine); 1218 unsigned long timeout_expire; 1219 s64 before = 0; /* Only to silence a compiler warning. */ 1220 int ret, sl_timeout = 1; 1221 1222 WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled"); 1223 1224 if (list_empty(&req->list)) 1225 return 0; 1226 1227 if (i915_gem_request_completed(req, true)) 1228 return 0; 1229 1230 timeout_expire = 0; 1231 if (timeout) { 1232 if (WARN_ON(*timeout < 0)) 1233 return -EINVAL; 1234 1235 if (*timeout == 0) 1236 return -ETIME; 1237 1238 timeout_expire = jiffies + nsecs_to_jiffies_timeout(*timeout); 1239 1240 /* 1241 * Record current time in case interrupted by signal, or wedged. 1242 */ 1243 before = ktime_get_raw_ns(); 1244 } 1245 1246 if (INTEL_INFO(dev_priv)->gen >= 6) 1247 gen6_rps_boost(dev_priv, rps, req->emitted_jiffies); 1248 1249 trace_i915_gem_request_wait_begin(req); 1250 1251 /* Optimistic spin for the next jiffie before touching IRQs */ 1252 #if 0 1253 ret = __i915_spin_request(req); 1254 if (ret == 0) 1255 goto out; 1256 #endif 1257 1258 if (!irq_test_in_progress && WARN_ON(!engine->irq_get(engine))) { 1259 ret = -ENODEV; 1260 goto out; 1261 } 1262 1263 lockmgr(&engine->irq_queue.lock, LK_EXCLUSIVE); 1264 for (;;) { 1265 struct timer_list timer; 1266 1267 /* We need to check whether any gpu reset happened in between 1268 * the request being submitted and now. If a reset has occurred, 1269 * the request is effectively complete (we either are in the 1270 * process of or have discarded the rendering and completely 1271 * reset the GPU. The results of the request are lost and we 1272 * are free to continue on with the original operation. 1273 */ 1274 if (req->reset_counter != i915_reset_counter(&dev_priv->gpu_error)) { 1275 ret = 0; 1276 break; 1277 } 1278 1279 if (i915_gem_request_completed(req, false)) { 1280 ret = 0; 1281 break; 1282 } 1283 1284 if (interruptible && signal_pending(curthread->td_lwp)) { 1285 ret = -ERESTARTSYS; 1286 break; 1287 } 1288 1289 if (timeout && time_after_eq(jiffies, timeout_expire)) { 1290 ret = -ETIME; 1291 break; 1292 } 1293 1294 timer.function = NULL; 1295 if (timeout || missed_irq(dev_priv, engine)) { 1296 unsigned long expire; 1297 1298 setup_timer_on_stack(&timer, fake_irq, (unsigned long)&engine->irq_queue); 1299 expire = missed_irq(dev_priv, engine) ? jiffies + 1 : timeout_expire; 1300 sl_timeout = expire - jiffies; 1301 if (sl_timeout < 1) 1302 sl_timeout = 1; 1303 mod_timer(&timer, expire); 1304 } 1305 1306 #if 0 1307 io_schedule(); 1308 #endif 1309 1310 if (timer.function) { 1311 del_singleshot_timer_sync(&timer); 1312 destroy_timer_on_stack(&timer); 1313 } 1314 1315 lksleep(&engine->irq_queue, &engine->irq_queue.lock, 1316 interruptible ? PCATCH : 0, "lwe", sl_timeout); 1317 } 1318 lockmgr(&engine->irq_queue.lock, LK_RELEASE); 1319 if (!irq_test_in_progress) 1320 engine->irq_put(engine); 1321 1322 out: 1323 trace_i915_gem_request_wait_end(req); 1324 1325 if (timeout) { 1326 s64 tres = *timeout - (ktime_get_raw_ns() - before); 1327 1328 *timeout = tres < 0 ? 0 : tres; 1329 1330 /* 1331 * Apparently ktime isn't accurate enough and occasionally has a 1332 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch 1333 * things up to make the test happy. We allow up to 1 jiffy. 1334 * 1335 * This is a regrssion from the timespec->ktime conversion. 1336 */ 1337 if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000) 1338 *timeout = 0; 1339 } 1340 1341 return ret; 1342 } 1343 1344 int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, 1345 struct drm_file *file) 1346 { 1347 struct drm_i915_file_private *file_priv; 1348 1349 WARN_ON(!req || !file || req->file_priv); 1350 1351 if (!req || !file) 1352 return -EINVAL; 1353 1354 if (req->file_priv) 1355 return -EINVAL; 1356 1357 file_priv = file->driver_priv; 1358 1359 spin_lock(&file_priv->mm.lock); 1360 req->file_priv = file_priv; 1361 list_add_tail(&req->client_list, &file_priv->mm.request_list); 1362 spin_unlock(&file_priv->mm.lock); 1363 1364 req->pid = curproc->p_pid; 1365 1366 return 0; 1367 } 1368 1369 static inline void 1370 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) 1371 { 1372 struct drm_i915_file_private *file_priv = request->file_priv; 1373 1374 if (!file_priv) 1375 return; 1376 1377 spin_lock(&file_priv->mm.lock); 1378 list_del(&request->client_list); 1379 request->file_priv = NULL; 1380 spin_unlock(&file_priv->mm.lock); 1381 1382 #if 0 1383 put_pid(request->pid); 1384 request->pid = NULL; 1385 #endif 1386 } 1387 1388 static void i915_gem_request_retire(struct drm_i915_gem_request *request) 1389 { 1390 trace_i915_gem_request_retire(request); 1391 1392 /* We know the GPU must have read the request to have 1393 * sent us the seqno + interrupt, so use the position 1394 * of tail of the request to update the last known position 1395 * of the GPU head. 1396 * 1397 * Note this requires that we are always called in request 1398 * completion order. 1399 */ 1400 request->ringbuf->last_retired_head = request->postfix; 1401 1402 list_del_init(&request->list); 1403 i915_gem_request_remove_from_client(request); 1404 1405 i915_gem_request_unreference(request); 1406 } 1407 1408 static void 1409 __i915_gem_request_retire__upto(struct drm_i915_gem_request *req) 1410 { 1411 struct intel_engine_cs *engine = req->engine; 1412 struct drm_i915_gem_request *tmp; 1413 1414 lockdep_assert_held(&engine->dev->struct_mutex); 1415 1416 if (list_empty(&req->list)) 1417 return; 1418 1419 do { 1420 tmp = list_first_entry(&engine->request_list, 1421 typeof(*tmp), list); 1422 1423 i915_gem_request_retire(tmp); 1424 } while (tmp != req); 1425 1426 WARN_ON(i915_verify_lists(engine->dev)); 1427 } 1428 1429 /** 1430 * Waits for a request to be signaled, and cleans up the 1431 * request and object lists appropriately for that event. 1432 */ 1433 int 1434 i915_wait_request(struct drm_i915_gem_request *req) 1435 { 1436 struct drm_i915_private *dev_priv = req->i915; 1437 bool interruptible; 1438 int ret; 1439 1440 interruptible = dev_priv->mm.interruptible; 1441 1442 BUG_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex)); 1443 1444 ret = __i915_wait_request(req, interruptible, NULL, NULL); 1445 if (ret) 1446 return ret; 1447 1448 /* If the GPU hung, we want to keep the requests to find the guilty. */ 1449 if (req->reset_counter == i915_reset_counter(&dev_priv->gpu_error)) 1450 __i915_gem_request_retire__upto(req); 1451 1452 return 0; 1453 } 1454 1455 /** 1456 * Ensures that all rendering to the object has completed and the object is 1457 * safe to unbind from the GTT or access from the CPU. 1458 */ 1459 int 1460 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, 1461 bool readonly) 1462 { 1463 int ret, i; 1464 1465 if (!obj->active) 1466 return 0; 1467 1468 if (readonly) { 1469 if (obj->last_write_req != NULL) { 1470 ret = i915_wait_request(obj->last_write_req); 1471 if (ret) 1472 return ret; 1473 1474 i = obj->last_write_req->engine->id; 1475 if (obj->last_read_req[i] == obj->last_write_req) 1476 i915_gem_object_retire__read(obj, i); 1477 else 1478 i915_gem_object_retire__write(obj); 1479 } 1480 } else { 1481 for (i = 0; i < I915_NUM_ENGINES; i++) { 1482 if (obj->last_read_req[i] == NULL) 1483 continue; 1484 1485 ret = i915_wait_request(obj->last_read_req[i]); 1486 if (ret) 1487 return ret; 1488 1489 i915_gem_object_retire__read(obj, i); 1490 } 1491 GEM_BUG_ON(obj->active); 1492 } 1493 1494 return 0; 1495 } 1496 1497 static void 1498 i915_gem_object_retire_request(struct drm_i915_gem_object *obj, 1499 struct drm_i915_gem_request *req) 1500 { 1501 int ring = req->engine->id; 1502 1503 if (obj->last_read_req[ring] == req) 1504 i915_gem_object_retire__read(obj, ring); 1505 else if (obj->last_write_req == req) 1506 i915_gem_object_retire__write(obj); 1507 1508 if (req->reset_counter == i915_reset_counter(&req->i915->gpu_error)) 1509 __i915_gem_request_retire__upto(req); 1510 } 1511 1512 /* A nonblocking variant of the above wait. This is a highly dangerous routine 1513 * as the object state may change during this call. 1514 */ 1515 static __must_check int 1516 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, 1517 struct intel_rps_client *rps, 1518 bool readonly) 1519 { 1520 struct drm_device *dev = obj->base.dev; 1521 struct drm_i915_private *dev_priv = dev->dev_private; 1522 struct drm_i915_gem_request *requests[I915_NUM_ENGINES]; 1523 int ret, i, n = 0; 1524 1525 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1526 BUG_ON(!dev_priv->mm.interruptible); 1527 1528 if (!obj->active) 1529 return 0; 1530 1531 if (readonly) { 1532 struct drm_i915_gem_request *req; 1533 1534 req = obj->last_write_req; 1535 if (req == NULL) 1536 return 0; 1537 1538 requests[n++] = i915_gem_request_reference(req); 1539 } else { 1540 for (i = 0; i < I915_NUM_ENGINES; i++) { 1541 struct drm_i915_gem_request *req; 1542 1543 req = obj->last_read_req[i]; 1544 if (req == NULL) 1545 continue; 1546 1547 requests[n++] = i915_gem_request_reference(req); 1548 } 1549 } 1550 1551 mutex_unlock(&dev->struct_mutex); 1552 ret = 0; 1553 for (i = 0; ret == 0 && i < n; i++) 1554 ret = __i915_wait_request(requests[i], true, NULL, rps); 1555 mutex_lock(&dev->struct_mutex); 1556 1557 for (i = 0; i < n; i++) { 1558 if (ret == 0) 1559 i915_gem_object_retire_request(obj, requests[i]); 1560 i915_gem_request_unreference(requests[i]); 1561 } 1562 1563 return ret; 1564 } 1565 1566 static struct intel_rps_client *to_rps_client(struct drm_file *file) 1567 { 1568 struct drm_i915_file_private *fpriv = file->driver_priv; 1569 return &fpriv->rps; 1570 } 1571 1572 /** 1573 * Called when user space prepares to use an object with the CPU, either 1574 * through the mmap ioctl's mapping or a GTT mapping. 1575 */ 1576 int 1577 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1578 struct drm_file *file) 1579 { 1580 struct drm_i915_gem_set_domain *args = data; 1581 struct drm_i915_gem_object *obj; 1582 uint32_t read_domains = args->read_domains; 1583 uint32_t write_domain = args->write_domain; 1584 int ret; 1585 1586 /* Only handle setting domains to types used by the CPU. */ 1587 if (write_domain & I915_GEM_GPU_DOMAINS) 1588 return -EINVAL; 1589 1590 if (read_domains & I915_GEM_GPU_DOMAINS) 1591 return -EINVAL; 1592 1593 /* Having something in the write domain implies it's in the read 1594 * domain, and only that read domain. Enforce that in the request. 1595 */ 1596 if (write_domain != 0 && read_domains != write_domain) 1597 return -EINVAL; 1598 1599 ret = i915_mutex_lock_interruptible(dev); 1600 if (ret) 1601 return ret; 1602 1603 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 1604 if (&obj->base == NULL) { 1605 ret = -ENOENT; 1606 goto unlock; 1607 } 1608 1609 /* Try to flush the object off the GPU without holding the lock. 1610 * We will repeat the flush holding the lock in the normal manner 1611 * to catch cases where we are gazumped. 1612 */ 1613 ret = i915_gem_object_wait_rendering__nonblocking(obj, 1614 to_rps_client(file), 1615 !write_domain); 1616 if (ret) 1617 goto unref; 1618 1619 if (read_domains & I915_GEM_DOMAIN_GTT) 1620 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 1621 else 1622 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 1623 1624 if (write_domain != 0) 1625 intel_fb_obj_invalidate(obj, 1626 write_domain == I915_GEM_DOMAIN_GTT ? 1627 ORIGIN_GTT : ORIGIN_CPU); 1628 1629 unref: 1630 drm_gem_object_unreference(&obj->base); 1631 unlock: 1632 mutex_unlock(&dev->struct_mutex); 1633 return ret; 1634 } 1635 1636 /** 1637 * Called when user space has done writes to this buffer 1638 */ 1639 int 1640 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1641 struct drm_file *file) 1642 { 1643 struct drm_i915_gem_sw_finish *args = data; 1644 struct drm_i915_gem_object *obj; 1645 int ret = 0; 1646 1647 ret = i915_mutex_lock_interruptible(dev); 1648 if (ret) 1649 return ret; 1650 1651 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 1652 if (&obj->base == NULL) { 1653 ret = -ENOENT; 1654 goto unlock; 1655 } 1656 1657 /* Pinned buffers may be scanout, so flush the cache */ 1658 if (obj->pin_display) 1659 i915_gem_object_flush_cpu_write_domain(obj); 1660 1661 drm_gem_object_unreference(&obj->base); 1662 unlock: 1663 mutex_unlock(&dev->struct_mutex); 1664 return ret; 1665 } 1666 1667 /** 1668 * Maps the contents of an object, returning the address it is mapped 1669 * into. 1670 * 1671 * While the mapping holds a reference on the contents of the object, it doesn't 1672 * imply a ref on the object itself. 1673 * 1674 * IMPORTANT: 1675 * 1676 * DRM driver writers who look a this function as an example for how to do GEM 1677 * mmap support, please don't implement mmap support like here. The modern way 1678 * to implement DRM mmap support is with an mmap offset ioctl (like 1679 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly. 1680 * That way debug tooling like valgrind will understand what's going on, hiding 1681 * the mmap call in a driver private ioctl will break that. The i915 driver only 1682 * does cpu mmaps this way because we didn't know better. 1683 */ 1684 int 1685 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1686 struct drm_file *file) 1687 { 1688 struct drm_i915_gem_mmap *args = data; 1689 struct drm_gem_object *obj; 1690 unsigned long addr; 1691 1692 struct proc *p = curproc; 1693 vm_map_t map = &p->p_vmspace->vm_map; 1694 vm_size_t size; 1695 int error = 0, rv; 1696 1697 if (args->flags & ~(I915_MMAP_WC)) 1698 return -EINVAL; 1699 1700 obj = drm_gem_object_lookup(file, args->handle); 1701 if (obj == NULL) 1702 return -ENOENT; 1703 1704 if (args->size == 0) 1705 goto out; 1706 1707 size = round_page(args->size); 1708 if (map->size + size > p->p_rlimit[RLIMIT_VMEM].rlim_cur) { 1709 error = -ENOMEM; 1710 goto out; 1711 } 1712 1713 /* prime objects have no backing filp to GEM mmap 1714 * pages from. 1715 */ 1716 1717 /* 1718 * Call hint to ensure that NULL is not returned as a valid address 1719 * and to reduce vm_map traversals. XXX causes instability, use a 1720 * fixed low address as the start point instead to avoid the NULL 1721 * return issue. 1722 */ 1723 1724 addr = PAGE_SIZE; 1725 1726 /* 1727 * Use 256KB alignment. It is unclear why this matters for a 1728 * virtual address but it appears to fix a number of application/X 1729 * crashes and kms console switching is much faster. 1730 */ 1731 vm_object_hold(obj->vm_obj); 1732 vm_object_reference_locked(obj->vm_obj); 1733 vm_object_drop(obj->vm_obj); 1734 1735 rv = vm_map_find(map, obj->vm_obj, NULL, 1736 args->offset, &addr, args->size, 1737 256 * 1024, /* align */ 1738 TRUE, /* fitit */ 1739 VM_MAPTYPE_NORMAL, VM_SUBSYS_DRM_GEM, 1740 VM_PROT_READ | VM_PROT_WRITE, /* prot */ 1741 VM_PROT_READ | VM_PROT_WRITE, /* max */ 1742 MAP_SHARED /* cow */); 1743 if (rv != KERN_SUCCESS) { 1744 vm_object_deallocate(obj->vm_obj); 1745 error = -vm_mmap_to_errno(rv); 1746 } else { 1747 args->addr_ptr = (uint64_t)addr; 1748 } 1749 out: 1750 drm_gem_object_unreference(obj); 1751 return (error); 1752 } 1753 1754 /** 1755 * i915_gem_fault - fault a page into the GTT 1756 * 1757 * vm_obj is locked on entry and expected to be locked on return. 1758 * 1759 * The vm_pager has placemarked the object with an anonymous memory page 1760 * which we must replace atomically to avoid races against concurrent faults 1761 * on the same page. XXX we currently are unable to do this atomically. 1762 * 1763 * If we are to return an error we should not touch the anonymous page, 1764 * the caller will deallocate it. 1765 * 1766 * XXX Most GEM calls appear to be interruptable, but we can't hard loop 1767 * in that case. Release all resources and wait 1 tick before retrying. 1768 * This is a huge problem which needs to be fixed by getting rid of most 1769 * of the interruptability. The linux code does not retry but does appear 1770 * to have some sort of mechanism (VM_FAULT_NOPAGE ?) for the higher level 1771 * to be able to retry. 1772 * 1773 * -- 1774 * @vma: VMA in question 1775 * @vmf: fault info 1776 * 1777 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1778 * from userspace. The fault handler takes care of binding the object to 1779 * the GTT (if needed), allocating and programming a fence register (again, 1780 * only if needed based on whether the old reg is still valid or the object 1781 * is tiled) and inserting a new PTE into the faulting process. 1782 * 1783 * Note that the faulting process may involve evicting existing objects 1784 * from the GTT and/or fence registers to make room. So performance may 1785 * suffer if the GTT working set is large or there are few fence registers 1786 * left. 1787 * 1788 * vm_obj is locked on entry and expected to be locked on return. The VM 1789 * pager has placed an anonymous memory page at (obj,offset) which we have 1790 * to replace. 1791 */ 1792 int i915_gem_fault(vm_object_t vm_obj, vm_ooffset_t offset, int prot, vm_page_t *mres) 1793 { 1794 struct drm_i915_gem_object *obj = to_intel_bo(vm_obj->handle); 1795 struct drm_device *dev = obj->base.dev; 1796 struct drm_i915_private *dev_priv = to_i915(dev); 1797 struct i915_ggtt *ggtt = &dev_priv->ggtt; 1798 struct i915_ggtt_view view = i915_ggtt_view_normal; 1799 unsigned long page_offset; 1800 vm_page_t m; 1801 int ret = 0; 1802 bool write = !!(prot & VM_PROT_WRITE); 1803 1804 intel_runtime_pm_get(dev_priv); 1805 1806 /* We don't use vmf->pgoff since that has the fake offset */ 1807 page_offset = (unsigned long)offset; 1808 1809 /* 1810 * vm_fault() has supplied us with a busied page placeholding 1811 * the operation. This presents a lock order reversal issue 1812 * again i915_gem_release_mmap() for our device mutex. 1813 * 1814 * Deal with the problem by getting rid of the placeholder now, 1815 * and then dealing with the potential for a new placeholder when 1816 * we try to insert later. 1817 */ 1818 if (*mres != NULL) { 1819 m = *mres; 1820 *mres = NULL; 1821 if ((m->busy_count & PBUSY_LOCKED) == 0) 1822 kprintf("i915_gem_fault: Page was not busy\n"); 1823 else 1824 vm_page_remove(m); 1825 vm_page_free(m); 1826 } 1827 1828 m = NULL; 1829 1830 retry: 1831 ret = i915_mutex_lock_interruptible(dev); 1832 if (ret) 1833 goto out; 1834 1835 trace_i915_gem_object_fault(obj, page_offset, true, write); 1836 1837 /* Try to flush the object off the GPU first without holding the lock. 1838 * Upon reacquiring the lock, we will perform our sanity checks and then 1839 * repeat the flush holding the lock in the normal manner to catch cases 1840 * where we are gazumped. 1841 */ 1842 ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write); 1843 if (ret) 1844 goto unlock; 1845 1846 /* Access to snoopable pages through the GTT is incoherent. */ 1847 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) { 1848 ret = -EFAULT; 1849 goto unlock; 1850 } 1851 1852 /* Use a partial view if the object is bigger than the aperture. */ 1853 if (obj->base.size >= ggtt->mappable_end && 1854 obj->tiling_mode == I915_TILING_NONE) { 1855 #if 0 1856 static const unsigned int chunk_size = 256; // 1 MiB 1857 1858 memset(&view, 0, sizeof(view)); 1859 view.type = I915_GGTT_VIEW_PARTIAL; 1860 view.params.partial.offset = rounddown(page_offset, chunk_size); 1861 view.params.partial.size = 1862 min_t(unsigned int, 1863 chunk_size, 1864 (vma->vm_end - vma->vm_start)/PAGE_SIZE - 1865 view.params.partial.offset); 1866 #endif 1867 } 1868 1869 /* Now pin it into the GTT if needed */ 1870 ret = i915_gem_object_ggtt_pin(obj, &view, 0, PIN_MAPPABLE); 1871 if (ret) 1872 goto unlock; 1873 1874 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1875 if (ret) 1876 goto unpin; 1877 1878 ret = i915_gem_object_get_fence(obj); 1879 if (ret) 1880 goto unpin; 1881 1882 /* 1883 * START FREEBSD MAGIC 1884 * 1885 * Add a pip count to avoid destruction and certain other 1886 * complex operations (such as collapses?) while unlocked. 1887 */ 1888 vm_object_pip_add(vm_obj, 1); 1889 1890 ret = 0; 1891 m = NULL; 1892 1893 /* 1894 * Since the object lock was dropped, another thread might have 1895 * faulted on the same GTT address and instantiated the mapping. 1896 * Recheck. 1897 */ 1898 m = vm_page_lookup(vm_obj, OFF_TO_IDX(offset)); 1899 if (m != NULL) { 1900 /* 1901 * Try to busy the page, retry on failure (non-zero ret). 1902 */ 1903 if (vm_page_busy_try(m, false)) { 1904 kprintf("i915_gem_fault: BUSY\n"); 1905 ret = -EINTR; 1906 goto unlock; 1907 } 1908 goto have_page; 1909 } 1910 /* 1911 * END FREEBSD MAGIC 1912 */ 1913 1914 obj->fault_mappable = true; 1915 1916 /* Finally, remap it using the new GTT offset */ 1917 m = vm_phys_fictitious_to_vm_page(ggtt->mappable_base + 1918 i915_gem_obj_ggtt_offset_view(obj, &view) + offset); 1919 if (m == NULL) { 1920 ret = -EFAULT; 1921 goto unpin; 1922 } 1923 KASSERT((m->flags & PG_FICTITIOUS) != 0, ("not fictitious %p", m)); 1924 KASSERT(m->wire_count == 1, ("wire_count not 1 %p", m)); 1925 1926 /* 1927 * Try to busy the page. Fails on non-zero return. 1928 */ 1929 if (vm_page_busy_try(m, false)) { 1930 kprintf("i915_gem_fault: BUSY(2)\n"); 1931 ret = -EINTR; 1932 goto unpin; 1933 } 1934 m->valid = VM_PAGE_BITS_ALL; 1935 1936 #if 1 1937 /* 1938 * This should always work since we already checked via a lookup 1939 * above. 1940 */ 1941 if (vm_page_insert(m, vm_obj, OFF_TO_IDX(offset)) == FALSE) { 1942 kprintf("i915:gem_fault: page %p,%jd already in object\n", 1943 vm_obj, 1944 OFF_TO_IDX(offset)); 1945 vm_page_wakeup(m); 1946 ret = -EINTR; 1947 goto unpin; 1948 } 1949 #else 1950 /* NOT COMPILED ATM */ 1951 if (unlikely(view.type == I915_GGTT_VIEW_PARTIAL)) { 1952 /* Overriding existing pages in partial view does not cause 1953 * us any trouble as TLBs are still valid because the fault 1954 * is due to userspace losing part of the mapping or never 1955 * having accessed it before (at this partials' range). 1956 */ 1957 unsigned long base = vma->vm_start + 1958 (view.params.partial.offset << PAGE_SHIFT); 1959 unsigned int i; 1960 1961 for (i = 0; i < view.params.partial.size; i++) { 1962 ret = vm_insert_pfn(vma, base + i * PAGE_SIZE, pfn + i); 1963 if (ret) 1964 break; 1965 } 1966 1967 obj->fault_mappable = true; 1968 } else { 1969 if (!obj->fault_mappable) { 1970 unsigned long size = min_t(unsigned long, 1971 vma->vm_end - vma->vm_start, 1972 obj->base.size); 1973 int i; 1974 1975 for (i = 0; i < size >> PAGE_SHIFT; i++) { 1976 ret = vm_insert_pfn(vma, 1977 (unsigned long)vma->vm_start + i * PAGE_SIZE, 1978 pfn + i); 1979 if (ret) 1980 break; 1981 } 1982 1983 obj->fault_mappable = true; 1984 } else 1985 ret = vm_insert_pfn(vma, 1986 (unsigned long)vmf->virtual_address, 1987 pfn + page_offset); 1988 } 1989 #endif 1990 1991 have_page: 1992 *mres = m; 1993 1994 i915_gem_object_ggtt_unpin_view(obj, &view); 1995 mutex_unlock(&dev->struct_mutex); 1996 ret = VM_PAGER_OK; 1997 goto done; 1998 1999 /* 2000 * ALTERNATIVE ERROR RETURN. 2001 * 2002 * OBJECT EXPECTED TO BE LOCKED. 2003 */ 2004 unpin: 2005 i915_gem_object_ggtt_unpin_view(obj, &view); 2006 unlock: 2007 mutex_unlock(&dev->struct_mutex); 2008 out: 2009 switch (ret) { 2010 case -EIO: 2011 /* 2012 * We eat errors when the gpu is terminally wedged to avoid 2013 * userspace unduly crashing (gl has no provisions for mmaps to 2014 * fail). But any other -EIO isn't ours (e.g. swap in failure) 2015 * and so needs to be reported. 2016 */ 2017 if (!i915_terminally_wedged(&dev_priv->gpu_error)) { 2018 // ret = VM_FAULT_SIGBUS; 2019 break; 2020 } 2021 case -EAGAIN: 2022 /* 2023 * EAGAIN means the gpu is hung and we'll wait for the error 2024 * handler to reset everything when re-faulting in 2025 * i915_mutex_lock_interruptible. 2026 */ 2027 case -ERESTARTSYS: 2028 case -EINTR: 2029 VM_OBJECT_UNLOCK(vm_obj); 2030 int dummy; 2031 tsleep(&dummy, 0, "delay", 1); /* XXX */ 2032 VM_OBJECT_LOCK(vm_obj); 2033 goto retry; 2034 default: 2035 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 2036 ret = VM_PAGER_ERROR; 2037 break; 2038 } 2039 2040 done: 2041 vm_object_pip_wakeup(vm_obj); 2042 2043 intel_runtime_pm_put(dev_priv); 2044 return ret; 2045 } 2046 2047 /** 2048 * i915_gem_release_mmap - remove physical page mappings 2049 * @obj: obj in question 2050 * 2051 * Preserve the reservation of the mmapping with the DRM core code, but 2052 * relinquish ownership of the pages back to the system. 2053 * 2054 * It is vital that we remove the page mapping if we have mapped a tiled 2055 * object through the GTT and then lose the fence register due to 2056 * resource pressure. Similarly if the object has been moved out of the 2057 * aperture, than pages mapped into userspace must be revoked. Removing the 2058 * mapping will then trigger a page fault on the next user access, allowing 2059 * fixup by i915_gem_fault(). 2060 */ 2061 void 2062 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 2063 { 2064 vm_object_t devobj; 2065 vm_page_t m; 2066 int i, page_count; 2067 2068 /* Serialisation between user GTT access and our code depends upon 2069 * revoking the CPU's PTE whilst the mutex is held. The next user 2070 * pagefault then has to wait until we release the mutex. 2071 */ 2072 lockdep_assert_held(&obj->base.dev->struct_mutex); 2073 2074 if (!obj->fault_mappable) 2075 return; 2076 2077 devobj = cdev_pager_lookup(obj); 2078 if (devobj != NULL) { 2079 page_count = OFF_TO_IDX(obj->base.size); 2080 2081 VM_OBJECT_LOCK(devobj); 2082 for (i = 0; i < page_count; i++) { 2083 m = vm_page_lookup_busy_wait(devobj, i, TRUE, "915unm"); 2084 if (m == NULL) 2085 continue; 2086 cdev_pager_free_page(devobj, m); 2087 } 2088 VM_OBJECT_UNLOCK(devobj); 2089 vm_object_deallocate(devobj); 2090 } 2091 2092 /* Ensure that the CPU's PTE are revoked and there are not outstanding 2093 * memory transactions from userspace before we return. The TLB 2094 * flushing implied above by changing the PTE above *should* be 2095 * sufficient, an extra barrier here just provides us with a bit 2096 * of paranoid documentation about our requirement to serialise 2097 * memory writes before touching registers / GSM. 2098 */ 2099 wmb(); 2100 2101 obj->fault_mappable = false; 2102 } 2103 2104 void 2105 i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv) 2106 { 2107 struct drm_i915_gem_object *obj; 2108 2109 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) 2110 i915_gem_release_mmap(obj); 2111 } 2112 2113 uint32_t 2114 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) 2115 { 2116 uint32_t gtt_size; 2117 2118 if (INTEL_INFO(dev)->gen >= 4 || 2119 tiling_mode == I915_TILING_NONE) 2120 return size; 2121 2122 /* Previous chips need a power-of-two fence region when tiling */ 2123 if (INTEL_INFO(dev)->gen == 3) 2124 gtt_size = 1024*1024; 2125 else 2126 gtt_size = 512*1024; 2127 2128 while (gtt_size < size) 2129 gtt_size <<= 1; 2130 2131 return gtt_size; 2132 } 2133 2134 /** 2135 * i915_gem_get_gtt_alignment - return required GTT alignment for an object 2136 * @obj: object to check 2137 * 2138 * Return the required GTT alignment for an object, taking into account 2139 * potential fence register mapping. 2140 */ 2141 uint32_t 2142 i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size, 2143 int tiling_mode, bool fenced) 2144 { 2145 /* 2146 * Minimum alignment is 4k (GTT page size), but might be greater 2147 * if a fence register is needed for the object. 2148 */ 2149 if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) || 2150 tiling_mode == I915_TILING_NONE) 2151 return 4096; 2152 2153 /* 2154 * Previous chips need to be aligned to the size of the smallest 2155 * fence register that can contain the object. 2156 */ 2157 return i915_gem_get_gtt_size(dev, size, tiling_mode); 2158 } 2159 2160 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) 2161 { 2162 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2163 int ret; 2164 2165 #if 0 2166 if (drm_vma_node_has_offset(&obj->base.vma_node)) 2167 return 0; 2168 #endif 2169 2170 dev_priv->mm.shrinker_no_lock_stealing = true; 2171 2172 ret = drm_gem_create_mmap_offset(&obj->base); 2173 if (ret != -ENOSPC) 2174 goto out; 2175 2176 /* Badly fragmented mmap space? The only way we can recover 2177 * space is by destroying unwanted objects. We can't randomly release 2178 * mmap_offsets as userspace expects them to be persistent for the 2179 * lifetime of the objects. The closest we can is to release the 2180 * offsets on purgeable objects by truncating it and marking it purged, 2181 * which prevents userspace from ever using that object again. 2182 */ 2183 i915_gem_shrink(dev_priv, 2184 obj->base.size >> PAGE_SHIFT, 2185 I915_SHRINK_BOUND | 2186 I915_SHRINK_UNBOUND | 2187 I915_SHRINK_PURGEABLE); 2188 ret = drm_gem_create_mmap_offset(&obj->base); 2189 if (ret != -ENOSPC) 2190 goto out; 2191 2192 i915_gem_shrink_all(dev_priv); 2193 ret = drm_gem_create_mmap_offset(&obj->base); 2194 out: 2195 dev_priv->mm.shrinker_no_lock_stealing = false; 2196 2197 return ret; 2198 } 2199 2200 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) 2201 { 2202 drm_gem_free_mmap_offset(&obj->base); 2203 } 2204 2205 int 2206 i915_gem_mmap_gtt(struct drm_file *file, 2207 struct drm_device *dev, 2208 uint32_t handle, 2209 uint64_t *offset) 2210 { 2211 struct drm_i915_gem_object *obj; 2212 int ret; 2213 2214 ret = i915_mutex_lock_interruptible(dev); 2215 if (ret) 2216 return ret; 2217 2218 obj = to_intel_bo(drm_gem_object_lookup(file, handle)); 2219 if (&obj->base == NULL) { 2220 ret = -ENOENT; 2221 goto unlock; 2222 } 2223 2224 if (obj->madv != I915_MADV_WILLNEED) { 2225 DRM_DEBUG("Attempting to mmap a purgeable buffer\n"); 2226 ret = -EFAULT; 2227 goto out; 2228 } 2229 2230 ret = i915_gem_object_create_mmap_offset(obj); 2231 if (ret) 2232 goto out; 2233 2234 *offset = DRM_GEM_MAPPING_OFF(obj->base.map_list.key) | 2235 DRM_GEM_MAPPING_KEY; 2236 2237 out: 2238 drm_gem_object_unreference(&obj->base); 2239 unlock: 2240 mutex_unlock(&dev->struct_mutex); 2241 return ret; 2242 } 2243 2244 /** 2245 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 2246 * @dev: DRM device 2247 * @data: GTT mapping ioctl data 2248 * @file: GEM object info 2249 * 2250 * Simply returns the fake offset to userspace so it can mmap it. 2251 * The mmap call will end up in drm_gem_mmap(), which will set things 2252 * up so we can get faults in the handler above. 2253 * 2254 * The fault handler will take care of binding the object into the GTT 2255 * (since it may have been evicted to make room for something), allocating 2256 * a fence register, and mapping the appropriate aperture address into 2257 * userspace. 2258 */ 2259 int 2260 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 2261 struct drm_file *file) 2262 { 2263 struct drm_i915_gem_mmap_gtt *args = data; 2264 2265 return i915_gem_mmap_gtt(file, dev, args->handle, (uint64_t *)&args->offset); 2266 } 2267 2268 /* Immediately discard the backing storage */ 2269 static void 2270 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 2271 { 2272 vm_object_t vm_obj; 2273 2274 vm_obj = obj->base.vm_obj; 2275 VM_OBJECT_LOCK(vm_obj); 2276 vm_object_page_remove(vm_obj, 0, 0, false); 2277 VM_OBJECT_UNLOCK(vm_obj); 2278 2279 obj->madv = __I915_MADV_PURGED; 2280 } 2281 2282 /* Try to discard unwanted pages */ 2283 static void 2284 i915_gem_object_invalidate(struct drm_i915_gem_object *obj) 2285 { 2286 #if 0 2287 struct address_space *mapping; 2288 #endif 2289 2290 switch (obj->madv) { 2291 case I915_MADV_DONTNEED: 2292 i915_gem_object_truncate(obj); 2293 case __I915_MADV_PURGED: 2294 return; 2295 } 2296 2297 #if 0 2298 if (obj->base.filp == NULL) 2299 return; 2300 2301 mapping = file_inode(obj->base.filp)->i_mapping, 2302 invalidate_mapping_pages(mapping, 0, (loff_t)-1); 2303 #endif 2304 } 2305 2306 static void 2307 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) 2308 { 2309 struct sg_page_iter sg_iter; 2310 int ret; 2311 2312 BUG_ON(obj->madv == __I915_MADV_PURGED); 2313 2314 ret = i915_gem_object_set_to_cpu_domain(obj, true); 2315 if (WARN_ON(ret)) { 2316 /* In the event of a disaster, abandon all caches and 2317 * hope for the best. 2318 */ 2319 i915_gem_clflush_object(obj, true); 2320 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 2321 } 2322 2323 i915_gem_gtt_finish_object(obj); 2324 2325 if (i915_gem_object_needs_bit17_swizzle(obj)) 2326 i915_gem_object_save_bit_17_swizzle(obj); 2327 2328 if (obj->madv == I915_MADV_DONTNEED) 2329 obj->dirty = 0; 2330 2331 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) { 2332 struct page *page = sg_page_iter_page(&sg_iter); 2333 2334 if (obj->dirty) 2335 set_page_dirty(page); 2336 2337 if (obj->madv == I915_MADV_WILLNEED) 2338 mark_page_accessed(page); 2339 2340 vm_page_busy_wait((struct vm_page *)page, FALSE, "i915gem"); 2341 vm_page_unwire((struct vm_page *)page, 1); 2342 vm_page_wakeup((struct vm_page *)page); 2343 } 2344 obj->dirty = 0; 2345 2346 sg_free_table(obj->pages); 2347 kfree(obj->pages); 2348 } 2349 2350 int 2351 i915_gem_object_put_pages(struct drm_i915_gem_object *obj) 2352 { 2353 const struct drm_i915_gem_object_ops *ops = obj->ops; 2354 2355 if (obj->pages == NULL) 2356 return 0; 2357 2358 if (obj->pages_pin_count) 2359 return -EBUSY; 2360 2361 BUG_ON(i915_gem_obj_bound_any(obj)); 2362 2363 /* ->put_pages might need to allocate memory for the bit17 swizzle 2364 * array, hence protect them from being reaped by removing them from gtt 2365 * lists early. */ 2366 list_del(&obj->global_list); 2367 2368 if (obj->mapping) { 2369 if (is_vmalloc_addr(obj->mapping)) 2370 vunmap(obj->mapping); 2371 else 2372 kunmap(kmap_to_page(obj->mapping)); 2373 obj->mapping = NULL; 2374 } 2375 2376 ops->put_pages(obj); 2377 obj->pages = NULL; 2378 2379 i915_gem_object_invalidate(obj); 2380 2381 return 0; 2382 } 2383 2384 static int 2385 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 2386 { 2387 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2388 int page_count, i; 2389 vm_object_t vm_obj; 2390 struct sg_table *st; 2391 struct scatterlist *sg; 2392 struct sg_page_iter sg_iter; 2393 struct page *page; 2394 unsigned long last_pfn = 0; /* suppress gcc warning */ 2395 int ret; 2396 2397 /* Assert that the object is not currently in any GPU domain. As it 2398 * wasn't in the GTT, there shouldn't be any way it could have been in 2399 * a GPU cache 2400 */ 2401 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); 2402 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); 2403 2404 st = kmalloc(sizeof(*st), M_DRM, M_WAITOK); 2405 if (st == NULL) 2406 return -ENOMEM; 2407 2408 page_count = obj->base.size / PAGE_SIZE; 2409 if (sg_alloc_table(st, page_count, GFP_KERNEL)) { 2410 kfree(st); 2411 return -ENOMEM; 2412 } 2413 2414 /* Get the list of pages out of our struct file. They'll be pinned 2415 * at this point until we release them. 2416 * 2417 * Fail silently without starting the shrinker 2418 */ 2419 vm_obj = obj->base.vm_obj; 2420 VM_OBJECT_LOCK(vm_obj); 2421 sg = st->sgl; 2422 st->nents = 0; 2423 for (i = 0; i < page_count; i++) { 2424 page = shmem_read_mapping_page(vm_obj, i); 2425 if (IS_ERR(page)) { 2426 i915_gem_shrink(dev_priv, 2427 page_count, 2428 I915_SHRINK_BOUND | 2429 I915_SHRINK_UNBOUND | 2430 I915_SHRINK_PURGEABLE); 2431 page = shmem_read_mapping_page(vm_obj, i); 2432 } 2433 if (IS_ERR(page)) { 2434 /* We've tried hard to allocate the memory by reaping 2435 * our own buffer, now let the real VM do its job and 2436 * go down in flames if truly OOM. 2437 */ 2438 i915_gem_shrink_all(dev_priv); 2439 page = shmem_read_mapping_page(vm_obj, i); 2440 if (IS_ERR(page)) { 2441 ret = PTR_ERR(page); 2442 goto err_pages; 2443 } 2444 } 2445 #ifdef CONFIG_SWIOTLB 2446 if (swiotlb_nr_tbl()) { 2447 st->nents++; 2448 sg_set_page(sg, page, PAGE_SIZE, 0); 2449 sg = sg_next(sg); 2450 continue; 2451 } 2452 #endif 2453 if (!i || page_to_pfn(page) != last_pfn + 1) { 2454 if (i) 2455 sg = sg_next(sg); 2456 st->nents++; 2457 sg_set_page(sg, page, PAGE_SIZE, 0); 2458 } else { 2459 sg->length += PAGE_SIZE; 2460 } 2461 last_pfn = page_to_pfn(page); 2462 2463 /* Check that the i965g/gm workaround works. */ 2464 } 2465 #ifdef CONFIG_SWIOTLB 2466 if (!swiotlb_nr_tbl()) 2467 #endif 2468 sg_mark_end(sg); 2469 obj->pages = st; 2470 VM_OBJECT_UNLOCK(vm_obj); 2471 2472 ret = i915_gem_gtt_prepare_object(obj); 2473 if (ret) 2474 goto err_pages; 2475 2476 if (i915_gem_object_needs_bit17_swizzle(obj)) 2477 i915_gem_object_do_bit_17_swizzle(obj); 2478 2479 if (obj->tiling_mode != I915_TILING_NONE && 2480 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) 2481 i915_gem_object_pin_pages(obj); 2482 2483 return 0; 2484 2485 err_pages: 2486 sg_mark_end(sg); 2487 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) { 2488 struct vm_page *vmp = (struct vm_page *)sg_page_iter_page(&sg_iter); 2489 vm_page_busy_wait(vmp, FALSE, "i915gem"); 2490 vm_page_unwire(vmp, 0); 2491 vm_page_wakeup(vmp); 2492 } 2493 VM_OBJECT_UNLOCK(vm_obj); 2494 sg_free_table(st); 2495 kfree(st); 2496 2497 /* shmemfs first checks if there is enough memory to allocate the page 2498 * and reports ENOSPC should there be insufficient, along with the usual 2499 * ENOMEM for a genuine allocation failure. 2500 * 2501 * We use ENOSPC in our driver to mean that we have run out of aperture 2502 * space and so want to translate the error from shmemfs back to our 2503 * usual understanding of ENOMEM. 2504 */ 2505 if (ret == -ENOSPC) 2506 ret = -ENOMEM; 2507 2508 return ret; 2509 } 2510 2511 /* Ensure that the associated pages are gathered from the backing storage 2512 * and pinned into our object. i915_gem_object_get_pages() may be called 2513 * multiple times before they are released by a single call to 2514 * i915_gem_object_put_pages() - once the pages are no longer referenced 2515 * either as a result of memory pressure (reaping pages under the shrinker) 2516 * or as the object is itself released. 2517 */ 2518 int 2519 i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2520 { 2521 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2522 const struct drm_i915_gem_object_ops *ops = obj->ops; 2523 int ret; 2524 2525 if (obj->pages) 2526 return 0; 2527 2528 if (obj->madv != I915_MADV_WILLNEED) { 2529 DRM_DEBUG("Attempting to obtain a purgeable object\n"); 2530 return -EFAULT; 2531 } 2532 2533 BUG_ON(obj->pages_pin_count); 2534 2535 ret = ops->get_pages(obj); 2536 if (ret) 2537 return ret; 2538 2539 list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list); 2540 2541 obj->get_page.sg = obj->pages->sgl; 2542 obj->get_page.last = 0; 2543 2544 return 0; 2545 } 2546 2547 void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj) 2548 { 2549 int ret; 2550 2551 lockdep_assert_held(&obj->base.dev->struct_mutex); 2552 2553 ret = i915_gem_object_get_pages(obj); 2554 if (ret) 2555 return ERR_PTR(ret); 2556 2557 i915_gem_object_pin_pages(obj); 2558 2559 if (obj->mapping == NULL) { 2560 struct page **pages; 2561 2562 pages = NULL; 2563 if (obj->base.size == PAGE_SIZE) 2564 obj->mapping = kmap(sg_page(obj->pages->sgl)); 2565 else 2566 pages = drm_malloc_gfp(obj->base.size >> PAGE_SHIFT, 2567 sizeof(*pages), 2568 GFP_TEMPORARY); 2569 if (pages != NULL) { 2570 struct sg_page_iter sg_iter; 2571 int n; 2572 2573 n = 0; 2574 for_each_sg_page(obj->pages->sgl, &sg_iter, 2575 obj->pages->nents, 0) 2576 pages[n++] = sg_page_iter_page(&sg_iter); 2577 2578 obj->mapping = vmap(pages, n, 0, PAGE_KERNEL); 2579 drm_free_large(pages); 2580 } 2581 if (obj->mapping == NULL) { 2582 i915_gem_object_unpin_pages(obj); 2583 return ERR_PTR(-ENOMEM); 2584 } 2585 } 2586 2587 return obj->mapping; 2588 } 2589 2590 void i915_vma_move_to_active(struct i915_vma *vma, 2591 struct drm_i915_gem_request *req) 2592 { 2593 struct drm_i915_gem_object *obj = vma->obj; 2594 struct intel_engine_cs *engine; 2595 2596 engine = i915_gem_request_get_engine(req); 2597 2598 /* Add a reference if we're newly entering the active list. */ 2599 if (obj->active == 0) 2600 drm_gem_object_reference(&obj->base); 2601 obj->active |= intel_engine_flag(engine); 2602 2603 list_move_tail(&obj->engine_list[engine->id], &engine->active_list); 2604 i915_gem_request_assign(&obj->last_read_req[engine->id], req); 2605 2606 list_move_tail(&vma->vm_link, &vma->vm->active_list); 2607 } 2608 2609 static void 2610 i915_gem_object_retire__write(struct drm_i915_gem_object *obj) 2611 { 2612 GEM_BUG_ON(obj->last_write_req == NULL); 2613 GEM_BUG_ON(!(obj->active & intel_engine_flag(obj->last_write_req->engine))); 2614 2615 i915_gem_request_assign(&obj->last_write_req, NULL); 2616 intel_fb_obj_flush(obj, true, ORIGIN_CS); 2617 } 2618 2619 static void 2620 i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring) 2621 { 2622 struct i915_vma *vma; 2623 2624 GEM_BUG_ON(obj->last_read_req[ring] == NULL); 2625 GEM_BUG_ON(!(obj->active & (1 << ring))); 2626 2627 list_del_init(&obj->engine_list[ring]); 2628 i915_gem_request_assign(&obj->last_read_req[ring], NULL); 2629 2630 if (obj->last_write_req && obj->last_write_req->engine->id == ring) 2631 i915_gem_object_retire__write(obj); 2632 2633 obj->active &= ~(1 << ring); 2634 if (obj->active) 2635 return; 2636 2637 /* Bump our place on the bound list to keep it roughly in LRU order 2638 * so that we don't steal from recently used but inactive objects 2639 * (unless we are forced to ofc!) 2640 */ 2641 list_move_tail(&obj->global_list, 2642 &to_i915(obj->base.dev)->mm.bound_list); 2643 2644 list_for_each_entry(vma, &obj->vma_list, obj_link) { 2645 if (!list_empty(&vma->vm_link)) 2646 list_move_tail(&vma->vm_link, &vma->vm->inactive_list); 2647 } 2648 2649 i915_gem_request_assign(&obj->last_fenced_req, NULL); 2650 drm_gem_object_unreference(&obj->base); 2651 } 2652 2653 static int 2654 i915_gem_init_seqno(struct drm_device *dev, u32 seqno) 2655 { 2656 struct drm_i915_private *dev_priv = dev->dev_private; 2657 struct intel_engine_cs *engine; 2658 int ret; 2659 2660 /* Carefully retire all requests without writing to the rings */ 2661 for_each_engine(engine, dev_priv) { 2662 ret = intel_engine_idle(engine); 2663 if (ret) 2664 return ret; 2665 } 2666 i915_gem_retire_requests(dev); 2667 2668 /* Finally reset hw state */ 2669 for_each_engine(engine, dev_priv) 2670 intel_ring_init_seqno(engine, seqno); 2671 2672 return 0; 2673 } 2674 2675 int i915_gem_set_seqno(struct drm_device *dev, u32 seqno) 2676 { 2677 struct drm_i915_private *dev_priv = dev->dev_private; 2678 int ret; 2679 2680 if (seqno == 0) 2681 return -EINVAL; 2682 2683 /* HWS page needs to be set less than what we 2684 * will inject to ring 2685 */ 2686 ret = i915_gem_init_seqno(dev, seqno - 1); 2687 if (ret) 2688 return ret; 2689 2690 /* Carefully set the last_seqno value so that wrap 2691 * detection still works 2692 */ 2693 dev_priv->next_seqno = seqno; 2694 dev_priv->last_seqno = seqno - 1; 2695 if (dev_priv->last_seqno == 0) 2696 dev_priv->last_seqno--; 2697 2698 return 0; 2699 } 2700 2701 int 2702 i915_gem_get_seqno(struct drm_device *dev, u32 *seqno) 2703 { 2704 struct drm_i915_private *dev_priv = dev->dev_private; 2705 2706 /* reserve 0 for non-seqno */ 2707 if (dev_priv->next_seqno == 0) { 2708 int ret = i915_gem_init_seqno(dev, 0); 2709 if (ret) 2710 return ret; 2711 2712 dev_priv->next_seqno = 1; 2713 } 2714 2715 *seqno = dev_priv->last_seqno = dev_priv->next_seqno++; 2716 return 0; 2717 } 2718 2719 /* 2720 * NB: This function is not allowed to fail. Doing so would mean the the 2721 * request is not being tracked for completion but the work itself is 2722 * going to happen on the hardware. This would be a Bad Thing(tm). 2723 */ 2724 void __i915_add_request(struct drm_i915_gem_request *request, 2725 struct drm_i915_gem_object *obj, 2726 bool flush_caches) 2727 { 2728 struct intel_engine_cs *engine; 2729 struct drm_i915_private *dev_priv; 2730 struct intel_ringbuffer *ringbuf; 2731 u32 request_start; 2732 int ret; 2733 2734 if (WARN_ON(request == NULL)) 2735 return; 2736 2737 engine = request->engine; 2738 dev_priv = request->i915; 2739 ringbuf = request->ringbuf; 2740 2741 /* 2742 * To ensure that this call will not fail, space for its emissions 2743 * should already have been reserved in the ring buffer. Let the ring 2744 * know that it is time to use that space up. 2745 */ 2746 intel_ring_reserved_space_use(ringbuf); 2747 2748 request_start = intel_ring_get_tail(ringbuf); 2749 /* 2750 * Emit any outstanding flushes - execbuf can fail to emit the flush 2751 * after having emitted the batchbuffer command. Hence we need to fix 2752 * things up similar to emitting the lazy request. The difference here 2753 * is that the flush _must_ happen before the next request, no matter 2754 * what. 2755 */ 2756 if (flush_caches) { 2757 if (i915.enable_execlists) 2758 ret = logical_ring_flush_all_caches(request); 2759 else 2760 ret = intel_ring_flush_all_caches(request); 2761 /* Not allowed to fail! */ 2762 WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret); 2763 } 2764 2765 trace_i915_gem_request_add(request); 2766 2767 request->head = request_start; 2768 2769 /* Whilst this request exists, batch_obj will be on the 2770 * active_list, and so will hold the active reference. Only when this 2771 * request is retired will the the batch_obj be moved onto the 2772 * inactive_list and lose its active reference. Hence we do not need 2773 * to explicitly hold another reference here. 2774 */ 2775 request->batch_obj = obj; 2776 2777 /* Seal the request and mark it as pending execution. Note that 2778 * we may inspect this state, without holding any locks, during 2779 * hangcheck. Hence we apply the barrier to ensure that we do not 2780 * see a more recent value in the hws than we are tracking. 2781 */ 2782 request->emitted_jiffies = jiffies; 2783 request->previous_seqno = engine->last_submitted_seqno; 2784 smp_store_mb(engine->last_submitted_seqno, request->seqno); 2785 list_add_tail(&request->list, &engine->request_list); 2786 2787 /* Record the position of the start of the request so that 2788 * should we detect the updated seqno part-way through the 2789 * GPU processing the request, we never over-estimate the 2790 * position of the head. 2791 */ 2792 request->postfix = intel_ring_get_tail(ringbuf); 2793 2794 if (i915.enable_execlists) 2795 ret = engine->emit_request(request); 2796 else { 2797 ret = engine->add_request(request); 2798 2799 request->tail = intel_ring_get_tail(ringbuf); 2800 } 2801 2802 /* Not allowed to fail! */ 2803 WARN(ret, "emit|add_request failed: %d!\n", ret); 2804 2805 i915_queue_hangcheck(engine->dev); 2806 2807 queue_delayed_work(dev_priv->wq, 2808 &dev_priv->mm.retire_work, 2809 round_jiffies_up_relative(HZ)); 2810 intel_mark_busy(dev_priv->dev); 2811 2812 /* Sanity check that the reserved size was large enough. */ 2813 intel_ring_reserved_space_end(ringbuf); 2814 } 2815 2816 static bool i915_context_is_banned(struct drm_i915_private *dev_priv, 2817 const struct intel_context *ctx) 2818 { 2819 unsigned long elapsed; 2820 2821 elapsed = get_seconds() - ctx->hang_stats.guilty_ts; 2822 2823 if (ctx->hang_stats.banned) 2824 return true; 2825 2826 if (ctx->hang_stats.ban_period_seconds && 2827 elapsed <= ctx->hang_stats.ban_period_seconds) { 2828 if (!i915_gem_context_is_default(ctx)) { 2829 DRM_DEBUG("context hanging too fast, banning!\n"); 2830 return true; 2831 } else if (i915_stop_ring_allow_ban(dev_priv)) { 2832 if (i915_stop_ring_allow_warn(dev_priv)) 2833 DRM_ERROR("gpu hanging too fast, banning!\n"); 2834 return true; 2835 } 2836 } 2837 2838 return false; 2839 } 2840 2841 static void i915_set_reset_status(struct drm_i915_private *dev_priv, 2842 struct intel_context *ctx, 2843 const bool guilty) 2844 { 2845 struct i915_ctx_hang_stats *hs; 2846 2847 if (WARN_ON(!ctx)) 2848 return; 2849 2850 hs = &ctx->hang_stats; 2851 2852 if (guilty) { 2853 hs->banned = i915_context_is_banned(dev_priv, ctx); 2854 hs->batch_active++; 2855 hs->guilty_ts = get_seconds(); 2856 } else { 2857 hs->batch_pending++; 2858 } 2859 } 2860 2861 void i915_gem_request_free(struct kref *req_ref) 2862 { 2863 struct drm_i915_gem_request *req = container_of(req_ref, 2864 typeof(*req), ref); 2865 struct intel_context *ctx = req->ctx; 2866 2867 if (req->file_priv) 2868 i915_gem_request_remove_from_client(req); 2869 2870 if (ctx) { 2871 if (i915.enable_execlists && ctx != req->i915->kernel_context) 2872 intel_lr_context_unpin(ctx, req->engine); 2873 2874 i915_gem_context_unreference(ctx); 2875 } 2876 2877 kfree(req); 2878 } 2879 2880 static inline int 2881 __i915_gem_request_alloc(struct intel_engine_cs *engine, 2882 struct intel_context *ctx, 2883 struct drm_i915_gem_request **req_out) 2884 { 2885 struct drm_i915_private *dev_priv = to_i915(engine->dev); 2886 unsigned reset_counter = i915_reset_counter(&dev_priv->gpu_error); 2887 struct drm_i915_gem_request *req; 2888 int ret; 2889 2890 if (!req_out) 2891 return -EINVAL; 2892 2893 *req_out = NULL; 2894 2895 /* ABI: Before userspace accesses the GPU (e.g. execbuffer), report 2896 * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex 2897 * and restart. 2898 */ 2899 ret = i915_gem_check_wedge(reset_counter, dev_priv->mm.interruptible); 2900 if (ret) 2901 return ret; 2902 2903 req = kzalloc(sizeof(*req), GFP_KERNEL); 2904 if (req == NULL) 2905 return -ENOMEM; 2906 2907 ret = i915_gem_get_seqno(engine->dev, &req->seqno); 2908 if (ret) 2909 goto err; 2910 2911 kref_init(&req->ref); 2912 req->i915 = dev_priv; 2913 req->engine = engine; 2914 req->reset_counter = reset_counter; 2915 req->ctx = ctx; 2916 i915_gem_context_reference(req->ctx); 2917 2918 if (i915.enable_execlists) 2919 ret = intel_logical_ring_alloc_request_extras(req); 2920 else 2921 ret = intel_ring_alloc_request_extras(req); 2922 if (ret) { 2923 i915_gem_context_unreference(req->ctx); 2924 goto err; 2925 } 2926 2927 /* 2928 * Reserve space in the ring buffer for all the commands required to 2929 * eventually emit this request. This is to guarantee that the 2930 * i915_add_request() call can't fail. Note that the reserve may need 2931 * to be redone if the request is not actually submitted straight 2932 * away, e.g. because a GPU scheduler has deferred it. 2933 */ 2934 if (i915.enable_execlists) 2935 ret = intel_logical_ring_reserve_space(req); 2936 else 2937 ret = intel_ring_reserve_space(req); 2938 if (ret) { 2939 /* 2940 * At this point, the request is fully allocated even if not 2941 * fully prepared. Thus it can be cleaned up using the proper 2942 * free code. 2943 */ 2944 intel_ring_reserved_space_cancel(req->ringbuf); 2945 i915_gem_request_unreference(req); 2946 return ret; 2947 } 2948 2949 *req_out = req; 2950 return 0; 2951 2952 err: 2953 kfree(req); 2954 return ret; 2955 } 2956 2957 /** 2958 * i915_gem_request_alloc - allocate a request structure 2959 * 2960 * @engine: engine that we wish to issue the request on. 2961 * @ctx: context that the request will be associated with. 2962 * This can be NULL if the request is not directly related to 2963 * any specific user context, in which case this function will 2964 * choose an appropriate context to use. 2965 * 2966 * Returns a pointer to the allocated request if successful, 2967 * or an error code if not. 2968 */ 2969 struct drm_i915_gem_request * 2970 i915_gem_request_alloc(struct intel_engine_cs *engine, 2971 struct intel_context *ctx) 2972 { 2973 struct drm_i915_gem_request *req; 2974 int err; 2975 2976 if (ctx == NULL) 2977 ctx = to_i915(engine->dev)->kernel_context; 2978 err = __i915_gem_request_alloc(engine, ctx, &req); 2979 return err ? ERR_PTR(err) : req; 2980 } 2981 2982 struct drm_i915_gem_request * 2983 i915_gem_find_active_request(struct intel_engine_cs *engine) 2984 { 2985 struct drm_i915_gem_request *request; 2986 2987 list_for_each_entry(request, &engine->request_list, list) { 2988 if (i915_gem_request_completed(request, false)) 2989 continue; 2990 2991 return request; 2992 } 2993 2994 return NULL; 2995 } 2996 2997 static void i915_gem_reset_engine_status(struct drm_i915_private *dev_priv, 2998 struct intel_engine_cs *engine) 2999 { 3000 struct drm_i915_gem_request *request; 3001 bool ring_hung; 3002 3003 request = i915_gem_find_active_request(engine); 3004 3005 if (request == NULL) 3006 return; 3007 3008 ring_hung = engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG; 3009 3010 i915_set_reset_status(dev_priv, request->ctx, ring_hung); 3011 3012 list_for_each_entry_continue(request, &engine->request_list, list) 3013 i915_set_reset_status(dev_priv, request->ctx, false); 3014 } 3015 3016 static void i915_gem_reset_engine_cleanup(struct drm_i915_private *dev_priv, 3017 struct intel_engine_cs *engine) 3018 { 3019 struct intel_ringbuffer *buffer; 3020 3021 while (!list_empty(&engine->active_list)) { 3022 struct drm_i915_gem_object *obj; 3023 3024 obj = list_first_entry(&engine->active_list, 3025 struct drm_i915_gem_object, 3026 engine_list[engine->id]); 3027 3028 i915_gem_object_retire__read(obj, engine->id); 3029 } 3030 3031 /* 3032 * Clear the execlists queue up before freeing the requests, as those 3033 * are the ones that keep the context and ringbuffer backing objects 3034 * pinned in place. 3035 */ 3036 3037 if (i915.enable_execlists) { 3038 /* Ensure irq handler finishes or is cancelled. */ 3039 tasklet_kill(&engine->irq_tasklet); 3040 3041 spin_lock_bh(&engine->execlist_lock); 3042 /* list_splice_tail_init checks for empty lists */ 3043 list_splice_tail_init(&engine->execlist_queue, 3044 &engine->execlist_retired_req_list); 3045 spin_unlock_bh(&engine->execlist_lock); 3046 3047 intel_execlists_retire_requests(engine); 3048 } 3049 3050 /* 3051 * We must free the requests after all the corresponding objects have 3052 * been moved off active lists. Which is the same order as the normal 3053 * retire_requests function does. This is important if object hold 3054 * implicit references on things like e.g. ppgtt address spaces through 3055 * the request. 3056 */ 3057 while (!list_empty(&engine->request_list)) { 3058 struct drm_i915_gem_request *request; 3059 3060 request = list_first_entry(&engine->request_list, 3061 struct drm_i915_gem_request, 3062 list); 3063 3064 i915_gem_request_retire(request); 3065 } 3066 3067 /* Having flushed all requests from all queues, we know that all 3068 * ringbuffers must now be empty. However, since we do not reclaim 3069 * all space when retiring the request (to prevent HEADs colliding 3070 * with rapid ringbuffer wraparound) the amount of available space 3071 * upon reset is less than when we start. Do one more pass over 3072 * all the ringbuffers to reset last_retired_head. 3073 */ 3074 list_for_each_entry(buffer, &engine->buffers, link) { 3075 buffer->last_retired_head = buffer->tail; 3076 intel_ring_update_space(buffer); 3077 } 3078 3079 intel_ring_init_seqno(engine, engine->last_submitted_seqno); 3080 } 3081 3082 void i915_gem_reset(struct drm_device *dev) 3083 { 3084 struct drm_i915_private *dev_priv = dev->dev_private; 3085 struct intel_engine_cs *engine; 3086 3087 /* 3088 * Before we free the objects from the requests, we need to inspect 3089 * them for finding the guilty party. As the requests only borrow 3090 * their reference to the objects, the inspection must be done first. 3091 */ 3092 for_each_engine(engine, dev_priv) 3093 i915_gem_reset_engine_status(dev_priv, engine); 3094 3095 for_each_engine(engine, dev_priv) 3096 i915_gem_reset_engine_cleanup(dev_priv, engine); 3097 3098 i915_gem_context_reset(dev); 3099 3100 i915_gem_restore_fences(dev); 3101 3102 WARN_ON(i915_verify_lists(dev)); 3103 } 3104 3105 /** 3106 * This function clears the request list as sequence numbers are passed. 3107 */ 3108 void 3109 i915_gem_retire_requests_ring(struct intel_engine_cs *engine) 3110 { 3111 WARN_ON(i915_verify_lists(engine->dev)); 3112 3113 /* Retire requests first as we use it above for the early return. 3114 * If we retire requests last, we may use a later seqno and so clear 3115 * the requests lists without clearing the active list, leading to 3116 * confusion. 3117 */ 3118 while (!list_empty(&engine->request_list)) { 3119 struct drm_i915_gem_request *request; 3120 3121 request = list_first_entry(&engine->request_list, 3122 struct drm_i915_gem_request, 3123 list); 3124 3125 if (!i915_gem_request_completed(request, true)) 3126 break; 3127 3128 i915_gem_request_retire(request); 3129 } 3130 3131 /* Move any buffers on the active list that are no longer referenced 3132 * by the ringbuffer to the flushing/inactive lists as appropriate, 3133 * before we free the context associated with the requests. 3134 */ 3135 while (!list_empty(&engine->active_list)) { 3136 struct drm_i915_gem_object *obj; 3137 3138 obj = list_first_entry(&engine->active_list, 3139 struct drm_i915_gem_object, 3140 engine_list[engine->id]); 3141 3142 if (!list_empty(&obj->last_read_req[engine->id]->list)) 3143 break; 3144 3145 i915_gem_object_retire__read(obj, engine->id); 3146 } 3147 3148 if (unlikely(engine->trace_irq_req && 3149 i915_gem_request_completed(engine->trace_irq_req, true))) { 3150 engine->irq_put(engine); 3151 i915_gem_request_assign(&engine->trace_irq_req, NULL); 3152 } 3153 3154 WARN_ON(i915_verify_lists(engine->dev)); 3155 } 3156 3157 bool 3158 i915_gem_retire_requests(struct drm_device *dev) 3159 { 3160 struct drm_i915_private *dev_priv = dev->dev_private; 3161 struct intel_engine_cs *engine; 3162 bool idle = true; 3163 3164 for_each_engine(engine, dev_priv) { 3165 i915_gem_retire_requests_ring(engine); 3166 idle &= list_empty(&engine->request_list); 3167 if (i915.enable_execlists) { 3168 spin_lock_bh(&engine->execlist_lock); 3169 idle &= list_empty(&engine->execlist_queue); 3170 spin_unlock_bh(&engine->execlist_lock); 3171 3172 intel_execlists_retire_requests(engine); 3173 } 3174 } 3175 3176 if (idle) 3177 mod_delayed_work(dev_priv->wq, 3178 &dev_priv->mm.idle_work, 3179 msecs_to_jiffies(100)); 3180 3181 return idle; 3182 } 3183 3184 static void 3185 i915_gem_retire_work_handler(struct work_struct *work) 3186 { 3187 struct drm_i915_private *dev_priv = 3188 container_of(work, typeof(*dev_priv), mm.retire_work.work); 3189 struct drm_device *dev = dev_priv->dev; 3190 bool idle; 3191 3192 /* Come back later if the device is busy... */ 3193 idle = false; 3194 if (mutex_trylock(&dev->struct_mutex)) { 3195 idle = i915_gem_retire_requests(dev); 3196 mutex_unlock(&dev->struct_mutex); 3197 } 3198 if (!idle) 3199 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 3200 round_jiffies_up_relative(HZ)); 3201 } 3202 3203 static void 3204 i915_gem_idle_work_handler(struct work_struct *work) 3205 { 3206 struct drm_i915_private *dev_priv = 3207 container_of(work, typeof(*dev_priv), mm.idle_work.work); 3208 struct drm_device *dev = dev_priv->dev; 3209 struct intel_engine_cs *engine; 3210 3211 for_each_engine(engine, dev_priv) 3212 if (!list_empty(&engine->request_list)) 3213 return; 3214 3215 /* we probably should sync with hangcheck here, using cancel_work_sync. 3216 * Also locking seems to be fubar here, engine->request_list is protected 3217 * by dev->struct_mutex. */ 3218 3219 intel_mark_idle(dev); 3220 3221 if (mutex_trylock(&dev->struct_mutex)) { 3222 for_each_engine(engine, dev_priv) 3223 i915_gem_batch_pool_fini(&engine->batch_pool); 3224 3225 mutex_unlock(&dev->struct_mutex); 3226 } 3227 } 3228 3229 /** 3230 * Ensures that an object will eventually get non-busy by flushing any required 3231 * write domains, emitting any outstanding lazy request and retiring and 3232 * completed requests. 3233 */ 3234 static int 3235 i915_gem_object_flush_active(struct drm_i915_gem_object *obj) 3236 { 3237 int i; 3238 3239 if (!obj->active) 3240 return 0; 3241 3242 for (i = 0; i < I915_NUM_ENGINES; i++) { 3243 struct drm_i915_gem_request *req; 3244 3245 req = obj->last_read_req[i]; 3246 if (req == NULL) 3247 continue; 3248 3249 if (list_empty(&req->list)) 3250 goto retire; 3251 3252 if (i915_gem_request_completed(req, true)) { 3253 __i915_gem_request_retire__upto(req); 3254 retire: 3255 i915_gem_object_retire__read(obj, i); 3256 } 3257 } 3258 3259 return 0; 3260 } 3261 3262 /** 3263 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT 3264 * @DRM_IOCTL_ARGS: standard ioctl arguments 3265 * 3266 * Returns 0 if successful, else an error is returned with the remaining time in 3267 * the timeout parameter. 3268 * -ETIME: object is still busy after timeout 3269 * -ERESTARTSYS: signal interrupted the wait 3270 * -ENONENT: object doesn't exist 3271 * Also possible, but rare: 3272 * -EAGAIN: GPU wedged 3273 * -ENOMEM: damn 3274 * -ENODEV: Internal IRQ fail 3275 * -E?: The add request failed 3276 * 3277 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any 3278 * non-zero timeout parameter the wait ioctl will wait for the given number of 3279 * nanoseconds on an object becoming unbusy. Since the wait itself does so 3280 * without holding struct_mutex the object may become re-busied before this 3281 * function completes. A similar but shorter * race condition exists in the busy 3282 * ioctl 3283 */ 3284 int 3285 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 3286 { 3287 struct drm_i915_gem_wait *args = data; 3288 struct drm_i915_gem_object *obj; 3289 struct drm_i915_gem_request *req[I915_NUM_ENGINES]; 3290 int i, n = 0; 3291 int ret; 3292 3293 if (args->flags != 0) 3294 return -EINVAL; 3295 3296 ret = i915_mutex_lock_interruptible(dev); 3297 if (ret) 3298 return ret; 3299 3300 obj = to_intel_bo(drm_gem_object_lookup(file, args->bo_handle)); 3301 if (&obj->base == NULL) { 3302 mutex_unlock(&dev->struct_mutex); 3303 return -ENOENT; 3304 } 3305 3306 /* Need to make sure the object gets inactive eventually. */ 3307 ret = i915_gem_object_flush_active(obj); 3308 if (ret) 3309 goto out; 3310 3311 if (!obj->active) 3312 goto out; 3313 3314 /* Do this after OLR check to make sure we make forward progress polling 3315 * on this IOCTL with a timeout == 0 (like busy ioctl) 3316 */ 3317 if (args->timeout_ns == 0) { 3318 ret = -ETIME; 3319 goto out; 3320 } 3321 3322 drm_gem_object_unreference(&obj->base); 3323 3324 for (i = 0; i < I915_NUM_ENGINES; i++) { 3325 if (obj->last_read_req[i] == NULL) 3326 continue; 3327 3328 req[n++] = i915_gem_request_reference(obj->last_read_req[i]); 3329 } 3330 3331 mutex_unlock(&dev->struct_mutex); 3332 3333 for (i = 0; i < n; i++) { 3334 if (ret == 0) 3335 ret = __i915_wait_request(req[i], true, 3336 args->timeout_ns > 0 ? &args->timeout_ns : NULL, 3337 to_rps_client(file)); 3338 i915_gem_request_unreference__unlocked(req[i]); 3339 } 3340 return ret; 3341 3342 out: 3343 drm_gem_object_unreference(&obj->base); 3344 mutex_unlock(&dev->struct_mutex); 3345 return ret; 3346 } 3347 3348 static int 3349 __i915_gem_object_sync(struct drm_i915_gem_object *obj, 3350 struct intel_engine_cs *to, 3351 struct drm_i915_gem_request *from_req, 3352 struct drm_i915_gem_request **to_req) 3353 { 3354 struct intel_engine_cs *from; 3355 int ret; 3356 3357 from = i915_gem_request_get_engine(from_req); 3358 if (to == from) 3359 return 0; 3360 3361 if (i915_gem_request_completed(from_req, true)) 3362 return 0; 3363 3364 if (!i915_semaphore_is_enabled(obj->base.dev)) { 3365 struct drm_i915_private *i915 = to_i915(obj->base.dev); 3366 ret = __i915_wait_request(from_req, 3367 i915->mm.interruptible, 3368 NULL, 3369 &i915->rps.semaphores); 3370 if (ret) 3371 return ret; 3372 3373 i915_gem_object_retire_request(obj, from_req); 3374 } else { 3375 int idx = intel_ring_sync_index(from, to); 3376 u32 seqno = i915_gem_request_get_seqno(from_req); 3377 3378 WARN_ON(!to_req); 3379 3380 if (seqno <= from->semaphore.sync_seqno[idx]) 3381 return 0; 3382 3383 if (*to_req == NULL) { 3384 struct drm_i915_gem_request *req; 3385 3386 req = i915_gem_request_alloc(to, NULL); 3387 if (IS_ERR(req)) 3388 return PTR_ERR(req); 3389 3390 *to_req = req; 3391 } 3392 3393 trace_i915_gem_ring_sync_to(*to_req, from, from_req); 3394 ret = to->semaphore.sync_to(*to_req, from, seqno); 3395 if (ret) 3396 return ret; 3397 3398 /* We use last_read_req because sync_to() 3399 * might have just caused seqno wrap under 3400 * the radar. 3401 */ 3402 from->semaphore.sync_seqno[idx] = 3403 i915_gem_request_get_seqno(obj->last_read_req[from->id]); 3404 } 3405 3406 return 0; 3407 } 3408 3409 /** 3410 * i915_gem_object_sync - sync an object to a ring. 3411 * 3412 * @obj: object which may be in use on another ring. 3413 * @to: ring we wish to use the object on. May be NULL. 3414 * @to_req: request we wish to use the object for. See below. 3415 * This will be allocated and returned if a request is 3416 * required but not passed in. 3417 * 3418 * This code is meant to abstract object synchronization with the GPU. 3419 * Calling with NULL implies synchronizing the object with the CPU 3420 * rather than a particular GPU ring. Conceptually we serialise writes 3421 * between engines inside the GPU. We only allow one engine to write 3422 * into a buffer at any time, but multiple readers. To ensure each has 3423 * a coherent view of memory, we must: 3424 * 3425 * - If there is an outstanding write request to the object, the new 3426 * request must wait for it to complete (either CPU or in hw, requests 3427 * on the same ring will be naturally ordered). 3428 * 3429 * - If we are a write request (pending_write_domain is set), the new 3430 * request must wait for outstanding read requests to complete. 3431 * 3432 * For CPU synchronisation (NULL to) no request is required. For syncing with 3433 * rings to_req must be non-NULL. However, a request does not have to be 3434 * pre-allocated. If *to_req is NULL and sync commands will be emitted then a 3435 * request will be allocated automatically and returned through *to_req. Note 3436 * that it is not guaranteed that commands will be emitted (because the system 3437 * might already be idle). Hence there is no need to create a request that 3438 * might never have any work submitted. Note further that if a request is 3439 * returned in *to_req, it is the responsibility of the caller to submit 3440 * that request (after potentially adding more work to it). 3441 * 3442 * Returns 0 if successful, else propagates up the lower layer error. 3443 */ 3444 int 3445 i915_gem_object_sync(struct drm_i915_gem_object *obj, 3446 struct intel_engine_cs *to, 3447 struct drm_i915_gem_request **to_req) 3448 { 3449 const bool readonly = obj->base.pending_write_domain == 0; 3450 struct drm_i915_gem_request *req[I915_NUM_ENGINES]; 3451 int ret, i, n; 3452 3453 if (!obj->active) 3454 return 0; 3455 3456 if (to == NULL) 3457 return i915_gem_object_wait_rendering(obj, readonly); 3458 3459 n = 0; 3460 if (readonly) { 3461 if (obj->last_write_req) 3462 req[n++] = obj->last_write_req; 3463 } else { 3464 for (i = 0; i < I915_NUM_ENGINES; i++) 3465 if (obj->last_read_req[i]) 3466 req[n++] = obj->last_read_req[i]; 3467 } 3468 for (i = 0; i < n; i++) { 3469 ret = __i915_gem_object_sync(obj, to, req[i], to_req); 3470 if (ret) 3471 return ret; 3472 } 3473 3474 return 0; 3475 } 3476 3477 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) 3478 { 3479 u32 old_write_domain, old_read_domains; 3480 3481 /* Force a pagefault for domain tracking on next user access */ 3482 i915_gem_release_mmap(obj); 3483 3484 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3485 return; 3486 3487 old_read_domains = obj->base.read_domains; 3488 old_write_domain = obj->base.write_domain; 3489 3490 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT; 3491 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT; 3492 3493 trace_i915_gem_object_change_domain(obj, 3494 old_read_domains, 3495 old_write_domain); 3496 } 3497 3498 static int __i915_vma_unbind(struct i915_vma *vma, bool wait) 3499 { 3500 struct drm_i915_gem_object *obj = vma->obj; 3501 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3502 int ret; 3503 3504 if (list_empty(&vma->obj_link)) 3505 return 0; 3506 3507 if (!drm_mm_node_allocated(&vma->node)) { 3508 i915_gem_vma_destroy(vma); 3509 return 0; 3510 } 3511 3512 if (vma->pin_count) 3513 return -EBUSY; 3514 3515 BUG_ON(obj->pages == NULL); 3516 3517 if (wait) { 3518 ret = i915_gem_object_wait_rendering(obj, false); 3519 if (ret) 3520 return ret; 3521 } 3522 3523 if (vma->is_ggtt && vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { 3524 i915_gem_object_finish_gtt(obj); 3525 3526 /* release the fence reg _after_ flushing */ 3527 ret = i915_gem_object_put_fence(obj); 3528 if (ret) 3529 return ret; 3530 } 3531 3532 trace_i915_vma_unbind(vma); 3533 3534 vma->vm->unbind_vma(vma); 3535 vma->bound = 0; 3536 3537 list_del_init(&vma->vm_link); 3538 if (vma->is_ggtt) { 3539 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { 3540 obj->map_and_fenceable = false; 3541 } else if (vma->ggtt_view.pages) { 3542 sg_free_table(vma->ggtt_view.pages); 3543 kfree(vma->ggtt_view.pages); 3544 } 3545 vma->ggtt_view.pages = NULL; 3546 } 3547 3548 drm_mm_remove_node(&vma->node); 3549 i915_gem_vma_destroy(vma); 3550 3551 /* Since the unbound list is global, only move to that list if 3552 * no more VMAs exist. */ 3553 if (list_empty(&obj->vma_list)) 3554 list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list); 3555 3556 /* And finally now the object is completely decoupled from this vma, 3557 * we can drop its hold on the backing storage and allow it to be 3558 * reaped by the shrinker. 3559 */ 3560 i915_gem_object_unpin_pages(obj); 3561 3562 return 0; 3563 } 3564 3565 int i915_vma_unbind(struct i915_vma *vma) 3566 { 3567 return __i915_vma_unbind(vma, true); 3568 } 3569 3570 int __i915_vma_unbind_no_wait(struct i915_vma *vma) 3571 { 3572 return __i915_vma_unbind(vma, false); 3573 } 3574 3575 int i915_gpu_idle(struct drm_device *dev) 3576 { 3577 struct drm_i915_private *dev_priv = dev->dev_private; 3578 struct intel_engine_cs *engine; 3579 int ret; 3580 3581 /* Flush everything onto the inactive list. */ 3582 for_each_engine(engine, dev_priv) { 3583 if (!i915.enable_execlists) { 3584 struct drm_i915_gem_request *req; 3585 3586 req = i915_gem_request_alloc(engine, NULL); 3587 if (IS_ERR(req)) 3588 return PTR_ERR(req); 3589 3590 ret = i915_switch_context(req); 3591 i915_add_request_no_flush(req); 3592 if (ret) 3593 return ret; 3594 } 3595 3596 ret = intel_engine_idle(engine); 3597 if (ret) 3598 return ret; 3599 } 3600 3601 WARN_ON(i915_verify_lists(dev)); 3602 return 0; 3603 } 3604 3605 static bool i915_gem_valid_gtt_space(struct i915_vma *vma, 3606 unsigned long cache_level) 3607 { 3608 struct drm_mm_node *gtt_space = &vma->node; 3609 struct drm_mm_node *other; 3610 3611 /* 3612 * On some machines we have to be careful when putting differing types 3613 * of snoopable memory together to avoid the prefetcher crossing memory 3614 * domains and dying. During vm initialisation, we decide whether or not 3615 * these constraints apply and set the drm_mm.color_adjust 3616 * appropriately. 3617 */ 3618 if (vma->vm->mm.color_adjust == NULL) 3619 return true; 3620 3621 if (!drm_mm_node_allocated(gtt_space)) 3622 return true; 3623 3624 if (list_empty(>t_space->node_list)) 3625 return true; 3626 3627 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list); 3628 if (other->allocated && !other->hole_follows && other->color != cache_level) 3629 return false; 3630 3631 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list); 3632 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level) 3633 return false; 3634 3635 return true; 3636 } 3637 3638 /** 3639 * Finds free space in the GTT aperture and binds the object or a view of it 3640 * there. 3641 */ 3642 static struct i915_vma * 3643 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, 3644 struct i915_address_space *vm, 3645 const struct i915_ggtt_view *ggtt_view, 3646 unsigned alignment, 3647 uint64_t flags) 3648 { 3649 struct drm_device *dev = obj->base.dev; 3650 struct drm_i915_private *dev_priv = to_i915(dev); 3651 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3652 u32 fence_alignment, unfenced_alignment; 3653 u32 search_flag, alloc_flag; 3654 u64 start, end; 3655 u64 size, fence_size; 3656 struct i915_vma *vma; 3657 int ret; 3658 3659 if (i915_is_ggtt(vm)) { 3660 u32 view_size; 3661 3662 if (WARN_ON(!ggtt_view)) 3663 return ERR_PTR(-EINVAL); 3664 3665 view_size = i915_ggtt_view_size(obj, ggtt_view); 3666 3667 fence_size = i915_gem_get_gtt_size(dev, 3668 view_size, 3669 obj->tiling_mode); 3670 fence_alignment = i915_gem_get_gtt_alignment(dev, 3671 view_size, 3672 obj->tiling_mode, 3673 true); 3674 unfenced_alignment = i915_gem_get_gtt_alignment(dev, 3675 view_size, 3676 obj->tiling_mode, 3677 false); 3678 size = flags & PIN_MAPPABLE ? fence_size : view_size; 3679 } else { 3680 fence_size = i915_gem_get_gtt_size(dev, 3681 obj->base.size, 3682 obj->tiling_mode); 3683 fence_alignment = i915_gem_get_gtt_alignment(dev, 3684 obj->base.size, 3685 obj->tiling_mode, 3686 true); 3687 unfenced_alignment = 3688 i915_gem_get_gtt_alignment(dev, 3689 obj->base.size, 3690 obj->tiling_mode, 3691 false); 3692 size = flags & PIN_MAPPABLE ? fence_size : obj->base.size; 3693 } 3694 3695 start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; 3696 end = vm->total; 3697 if (flags & PIN_MAPPABLE) 3698 end = min_t(u64, end, ggtt->mappable_end); 3699 if (flags & PIN_ZONE_4G) 3700 end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE); 3701 3702 if (alignment == 0) 3703 alignment = flags & PIN_MAPPABLE ? fence_alignment : 3704 unfenced_alignment; 3705 if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) { 3706 DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n", 3707 ggtt_view ? ggtt_view->type : 0, 3708 alignment); 3709 return ERR_PTR(-EINVAL); 3710 } 3711 3712 /* If binding the object/GGTT view requires more space than the entire 3713 * aperture has, reject it early before evicting everything in a vain 3714 * attempt to find space. 3715 */ 3716 if (size > end) { 3717 DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%llu > %s aperture=%llu\n", 3718 ggtt_view ? ggtt_view->type : 0, 3719 size, 3720 flags & PIN_MAPPABLE ? "mappable" : "total", 3721 end); 3722 return ERR_PTR(-E2BIG); 3723 } 3724 3725 ret = i915_gem_object_get_pages(obj); 3726 if (ret) 3727 return ERR_PTR(ret); 3728 3729 i915_gem_object_pin_pages(obj); 3730 3731 vma = ggtt_view ? i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) : 3732 i915_gem_obj_lookup_or_create_vma(obj, vm); 3733 3734 if (IS_ERR(vma)) 3735 goto err_unpin; 3736 3737 if (flags & PIN_OFFSET_FIXED) { 3738 uint64_t offset = flags & PIN_OFFSET_MASK; 3739 3740 if (offset & (alignment - 1) || offset + size > end) { 3741 ret = -EINVAL; 3742 goto err_free_vma; 3743 } 3744 vma->node.start = offset; 3745 vma->node.size = size; 3746 vma->node.color = obj->cache_level; 3747 ret = drm_mm_reserve_node(&vm->mm, &vma->node); 3748 if (ret) { 3749 ret = i915_gem_evict_for_vma(vma); 3750 if (ret == 0) 3751 ret = drm_mm_reserve_node(&vm->mm, &vma->node); 3752 } 3753 if (ret) 3754 goto err_free_vma; 3755 } else { 3756 if (flags & PIN_HIGH) { 3757 search_flag = DRM_MM_SEARCH_BELOW; 3758 alloc_flag = DRM_MM_CREATE_TOP; 3759 } else { 3760 search_flag = DRM_MM_SEARCH_DEFAULT; 3761 alloc_flag = DRM_MM_CREATE_DEFAULT; 3762 } 3763 3764 search_free: 3765 ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, 3766 size, alignment, 3767 obj->cache_level, 3768 start, end, 3769 search_flag, 3770 alloc_flag); 3771 if (ret) { 3772 ret = i915_gem_evict_something(dev, vm, size, alignment, 3773 obj->cache_level, 3774 start, end, 3775 flags); 3776 if (ret == 0) 3777 goto search_free; 3778 3779 goto err_free_vma; 3780 } 3781 } 3782 if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) { 3783 ret = -EINVAL; 3784 goto err_remove_node; 3785 } 3786 3787 trace_i915_vma_bind(vma, flags); 3788 ret = i915_vma_bind(vma, obj->cache_level, flags); 3789 if (ret) 3790 goto err_remove_node; 3791 3792 list_move_tail(&obj->global_list, &dev_priv->mm.bound_list); 3793 list_add_tail(&vma->vm_link, &vm->inactive_list); 3794 3795 return vma; 3796 3797 err_remove_node: 3798 drm_mm_remove_node(&vma->node); 3799 err_free_vma: 3800 i915_gem_vma_destroy(vma); 3801 vma = ERR_PTR(ret); 3802 err_unpin: 3803 i915_gem_object_unpin_pages(obj); 3804 return vma; 3805 } 3806 3807 bool 3808 i915_gem_clflush_object(struct drm_i915_gem_object *obj, 3809 bool force) 3810 { 3811 /* If we don't have a page list set up, then we're not pinned 3812 * to GPU, and we can ignore the cache flush because it'll happen 3813 * again at bind time. 3814 */ 3815 if (obj->pages == NULL) 3816 return false; 3817 3818 /* 3819 * Stolen memory is always coherent with the GPU as it is explicitly 3820 * marked as wc by the system, or the system is cache-coherent. 3821 */ 3822 if (obj->stolen || obj->phys_handle) 3823 return false; 3824 3825 /* If the GPU is snooping the contents of the CPU cache, 3826 * we do not need to manually clear the CPU cache lines. However, 3827 * the caches are only snooped when the render cache is 3828 * flushed/invalidated. As we always have to emit invalidations 3829 * and flushes when moving into and out of the RENDER domain, correct 3830 * snooping behaviour occurs naturally as the result of our domain 3831 * tracking. 3832 */ 3833 if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) { 3834 obj->cache_dirty = true; 3835 return false; 3836 } 3837 3838 trace_i915_gem_object_clflush(obj); 3839 drm_clflush_sg(obj->pages); 3840 obj->cache_dirty = false; 3841 3842 return true; 3843 } 3844 3845 /** Flushes the GTT write domain for the object if it's dirty. */ 3846 static void 3847 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) 3848 { 3849 uint32_t old_write_domain; 3850 3851 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) 3852 return; 3853 3854 /* No actual flushing is required for the GTT write domain. Writes 3855 * to it immediately go to main memory as far as we know, so there's 3856 * no chipset flush. It also doesn't land in render cache. 3857 * 3858 * However, we do have to enforce the order so that all writes through 3859 * the GTT land before any writes to the device, such as updates to 3860 * the GATT itself. 3861 */ 3862 wmb(); 3863 3864 old_write_domain = obj->base.write_domain; 3865 obj->base.write_domain = 0; 3866 3867 intel_fb_obj_flush(obj, false, ORIGIN_GTT); 3868 3869 trace_i915_gem_object_change_domain(obj, 3870 obj->base.read_domains, 3871 old_write_domain); 3872 } 3873 3874 /** Flushes the CPU write domain for the object if it's dirty. */ 3875 static void 3876 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) 3877 { 3878 uint32_t old_write_domain; 3879 3880 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) 3881 return; 3882 3883 if (i915_gem_clflush_object(obj, obj->pin_display)) 3884 i915_gem_chipset_flush(obj->base.dev); 3885 3886 old_write_domain = obj->base.write_domain; 3887 obj->base.write_domain = 0; 3888 3889 intel_fb_obj_flush(obj, false, ORIGIN_CPU); 3890 3891 trace_i915_gem_object_change_domain(obj, 3892 obj->base.read_domains, 3893 old_write_domain); 3894 } 3895 3896 /** 3897 * Moves a single object to the GTT read, and possibly write domain. 3898 * 3899 * This function returns when the move is complete, including waiting on 3900 * flushes to occur. 3901 */ 3902 int 3903 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 3904 { 3905 struct drm_device *dev = obj->base.dev; 3906 struct drm_i915_private *dev_priv = to_i915(dev); 3907 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3908 uint32_t old_write_domain, old_read_domains; 3909 struct i915_vma *vma; 3910 int ret; 3911 3912 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) 3913 return 0; 3914 3915 ret = i915_gem_object_wait_rendering(obj, !write); 3916 if (ret) 3917 return ret; 3918 3919 /* Flush and acquire obj->pages so that we are coherent through 3920 * direct access in memory with previous cached writes through 3921 * shmemfs and that our cache domain tracking remains valid. 3922 * For example, if the obj->filp was moved to swap without us 3923 * being notified and releasing the pages, we would mistakenly 3924 * continue to assume that the obj remained out of the CPU cached 3925 * domain. 3926 */ 3927 ret = i915_gem_object_get_pages(obj); 3928 if (ret) 3929 return ret; 3930 3931 i915_gem_object_flush_cpu_write_domain(obj); 3932 3933 /* Serialise direct access to this object with the barriers for 3934 * coherent writes from the GPU, by effectively invalidating the 3935 * GTT domain upon first access. 3936 */ 3937 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3938 mb(); 3939 3940 old_write_domain = obj->base.write_domain; 3941 old_read_domains = obj->base.read_domains; 3942 3943 /* It should now be out of any other write domains, and we can update 3944 * the domain values for our changes. 3945 */ 3946 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 3947 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 3948 if (write) { 3949 obj->base.read_domains = I915_GEM_DOMAIN_GTT; 3950 obj->base.write_domain = I915_GEM_DOMAIN_GTT; 3951 obj->dirty = 1; 3952 } 3953 3954 trace_i915_gem_object_change_domain(obj, 3955 old_read_domains, 3956 old_write_domain); 3957 3958 /* And bump the LRU for this access */ 3959 vma = i915_gem_obj_to_ggtt(obj); 3960 if (vma && drm_mm_node_allocated(&vma->node) && !obj->active) 3961 list_move_tail(&vma->vm_link, 3962 &ggtt->base.inactive_list); 3963 3964 return 0; 3965 } 3966 3967 /** 3968 * Changes the cache-level of an object across all VMA. 3969 * 3970 * After this function returns, the object will be in the new cache-level 3971 * across all GTT and the contents of the backing storage will be coherent, 3972 * with respect to the new cache-level. In order to keep the backing storage 3973 * coherent for all users, we only allow a single cache level to be set 3974 * globally on the object and prevent it from being changed whilst the 3975 * hardware is reading from the object. That is if the object is currently 3976 * on the scanout it will be set to uncached (or equivalent display 3977 * cache coherency) and all non-MOCS GPU access will also be uncached so 3978 * that all direct access to the scanout remains coherent. 3979 */ 3980 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 3981 enum i915_cache_level cache_level) 3982 { 3983 struct drm_device *dev = obj->base.dev; 3984 struct i915_vma *vma, *next; 3985 bool bound = false; 3986 int ret = 0; 3987 3988 if (obj->cache_level == cache_level) 3989 goto out; 3990 3991 /* Inspect the list of currently bound VMA and unbind any that would 3992 * be invalid given the new cache-level. This is principally to 3993 * catch the issue of the CS prefetch crossing page boundaries and 3994 * reading an invalid PTE on older architectures. 3995 */ 3996 list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) { 3997 if (!drm_mm_node_allocated(&vma->node)) 3998 continue; 3999 4000 if (vma->pin_count) { 4001 DRM_DEBUG("can not change the cache level of pinned objects\n"); 4002 return -EBUSY; 4003 } 4004 4005 if (!i915_gem_valid_gtt_space(vma, cache_level)) { 4006 ret = i915_vma_unbind(vma); 4007 if (ret) 4008 return ret; 4009 } else 4010 bound = true; 4011 } 4012 4013 /* We can reuse the existing drm_mm nodes but need to change the 4014 * cache-level on the PTE. We could simply unbind them all and 4015 * rebind with the correct cache-level on next use. However since 4016 * we already have a valid slot, dma mapping, pages etc, we may as 4017 * rewrite the PTE in the belief that doing so tramples upon less 4018 * state and so involves less work. 4019 */ 4020 if (bound) { 4021 /* Before we change the PTE, the GPU must not be accessing it. 4022 * If we wait upon the object, we know that all the bound 4023 * VMA are no longer active. 4024 */ 4025 ret = i915_gem_object_wait_rendering(obj, false); 4026 if (ret) 4027 return ret; 4028 4029 if (!HAS_LLC(dev) && cache_level != I915_CACHE_NONE) { 4030 /* Access to snoopable pages through the GTT is 4031 * incoherent and on some machines causes a hard 4032 * lockup. Relinquish the CPU mmaping to force 4033 * userspace to refault in the pages and we can 4034 * then double check if the GTT mapping is still 4035 * valid for that pointer access. 4036 */ 4037 i915_gem_release_mmap(obj); 4038 4039 /* As we no longer need a fence for GTT access, 4040 * we can relinquish it now (and so prevent having 4041 * to steal a fence from someone else on the next 4042 * fence request). Note GPU activity would have 4043 * dropped the fence as all snoopable access is 4044 * supposed to be linear. 4045 */ 4046 ret = i915_gem_object_put_fence(obj); 4047 if (ret) 4048 return ret; 4049 } else { 4050 /* We either have incoherent backing store and 4051 * so no GTT access or the architecture is fully 4052 * coherent. In such cases, existing GTT mmaps 4053 * ignore the cache bit in the PTE and we can 4054 * rewrite it without confusing the GPU or having 4055 * to force userspace to fault back in its mmaps. 4056 */ 4057 } 4058 4059 list_for_each_entry(vma, &obj->vma_list, obj_link) { 4060 if (!drm_mm_node_allocated(&vma->node)) 4061 continue; 4062 4063 ret = i915_vma_bind(vma, cache_level, PIN_UPDATE); 4064 if (ret) 4065 return ret; 4066 } 4067 } 4068 4069 list_for_each_entry(vma, &obj->vma_list, obj_link) 4070 vma->node.color = cache_level; 4071 obj->cache_level = cache_level; 4072 4073 out: 4074 /* Flush the dirty CPU caches to the backing storage so that the 4075 * object is now coherent at its new cache level (with respect 4076 * to the access domain). 4077 */ 4078 if (obj->cache_dirty && 4079 obj->base.write_domain != I915_GEM_DOMAIN_CPU && 4080 cpu_write_needs_clflush(obj)) { 4081 if (i915_gem_clflush_object(obj, true)) 4082 i915_gem_chipset_flush(obj->base.dev); 4083 } 4084 4085 return 0; 4086 } 4087 4088 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 4089 struct drm_file *file) 4090 { 4091 struct drm_i915_gem_caching *args = data; 4092 struct drm_i915_gem_object *obj; 4093 4094 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 4095 if (&obj->base == NULL) 4096 return -ENOENT; 4097 4098 switch (obj->cache_level) { 4099 case I915_CACHE_LLC: 4100 case I915_CACHE_L3_LLC: 4101 args->caching = I915_CACHING_CACHED; 4102 break; 4103 4104 case I915_CACHE_WT: 4105 args->caching = I915_CACHING_DISPLAY; 4106 break; 4107 4108 default: 4109 args->caching = I915_CACHING_NONE; 4110 break; 4111 } 4112 4113 drm_gem_object_unreference_unlocked(&obj->base); 4114 return 0; 4115 } 4116 4117 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 4118 struct drm_file *file) 4119 { 4120 struct drm_i915_private *dev_priv = dev->dev_private; 4121 struct drm_i915_gem_caching *args = data; 4122 struct drm_i915_gem_object *obj; 4123 enum i915_cache_level level; 4124 int ret; 4125 4126 switch (args->caching) { 4127 case I915_CACHING_NONE: 4128 level = I915_CACHE_NONE; 4129 break; 4130 case I915_CACHING_CACHED: 4131 /* 4132 * Due to a HW issue on BXT A stepping, GPU stores via a 4133 * snooped mapping may leave stale data in a corresponding CPU 4134 * cacheline, whereas normally such cachelines would get 4135 * invalidated. 4136 */ 4137 if (!HAS_LLC(dev) && !HAS_SNOOP(dev)) 4138 return -ENODEV; 4139 4140 level = I915_CACHE_LLC; 4141 break; 4142 case I915_CACHING_DISPLAY: 4143 level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE; 4144 break; 4145 default: 4146 return -EINVAL; 4147 } 4148 4149 intel_runtime_pm_get(dev_priv); 4150 4151 ret = i915_mutex_lock_interruptible(dev); 4152 if (ret) 4153 goto rpm_put; 4154 4155 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 4156 if (&obj->base == NULL) { 4157 ret = -ENOENT; 4158 goto unlock; 4159 } 4160 4161 ret = i915_gem_object_set_cache_level(obj, level); 4162 4163 drm_gem_object_unreference(&obj->base); 4164 unlock: 4165 mutex_unlock(&dev->struct_mutex); 4166 rpm_put: 4167 intel_runtime_pm_put(dev_priv); 4168 4169 return ret; 4170 } 4171 4172 /* 4173 * Prepare buffer for display plane (scanout, cursors, etc). 4174 * Can be called from an uninterruptible phase (modesetting) and allows 4175 * any flushes to be pipelined (for pageflips). 4176 */ 4177 int 4178 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 4179 u32 alignment, 4180 const struct i915_ggtt_view *view) 4181 { 4182 u32 old_read_domains, old_write_domain; 4183 int ret; 4184 4185 /* Mark the pin_display early so that we account for the 4186 * display coherency whilst setting up the cache domains. 4187 */ 4188 obj->pin_display++; 4189 4190 /* The display engine is not coherent with the LLC cache on gen6. As 4191 * a result, we make sure that the pinning that is about to occur is 4192 * done with uncached PTEs. This is lowest common denominator for all 4193 * chipsets. 4194 * 4195 * However for gen6+, we could do better by using the GFDT bit instead 4196 * of uncaching, which would allow us to flush all the LLC-cached data 4197 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 4198 */ 4199 ret = i915_gem_object_set_cache_level(obj, 4200 HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE); 4201 if (ret) 4202 goto err_unpin_display; 4203 4204 /* As the user may map the buffer once pinned in the display plane 4205 * (e.g. libkms for the bootup splash), we have to ensure that we 4206 * always use map_and_fenceable for all scanout buffers. 4207 */ 4208 ret = i915_gem_object_ggtt_pin(obj, view, alignment, 4209 view->type == I915_GGTT_VIEW_NORMAL ? 4210 PIN_MAPPABLE : 0); 4211 if (ret) 4212 goto err_unpin_display; 4213 4214 i915_gem_object_flush_cpu_write_domain(obj); 4215 4216 old_write_domain = obj->base.write_domain; 4217 old_read_domains = obj->base.read_domains; 4218 4219 /* It should now be out of any other write domains, and we can update 4220 * the domain values for our changes. 4221 */ 4222 obj->base.write_domain = 0; 4223 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 4224 4225 trace_i915_gem_object_change_domain(obj, 4226 old_read_domains, 4227 old_write_domain); 4228 4229 return 0; 4230 4231 err_unpin_display: 4232 obj->pin_display--; 4233 return ret; 4234 } 4235 4236 void 4237 i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj, 4238 const struct i915_ggtt_view *view) 4239 { 4240 if (WARN_ON(obj->pin_display == 0)) 4241 return; 4242 4243 i915_gem_object_ggtt_unpin_view(obj, view); 4244 4245 obj->pin_display--; 4246 } 4247 4248 /** 4249 * Moves a single object to the CPU read, and possibly write domain. 4250 * 4251 * This function returns when the move is complete, including waiting on 4252 * flushes to occur. 4253 */ 4254 int 4255 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 4256 { 4257 uint32_t old_write_domain, old_read_domains; 4258 int ret; 4259 4260 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) 4261 return 0; 4262 4263 ret = i915_gem_object_wait_rendering(obj, !write); 4264 if (ret) 4265 return ret; 4266 4267 i915_gem_object_flush_gtt_write_domain(obj); 4268 4269 old_write_domain = obj->base.write_domain; 4270 old_read_domains = obj->base.read_domains; 4271 4272 /* Flush the CPU cache if it's still invalid. */ 4273 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { 4274 i915_gem_clflush_object(obj, false); 4275 4276 obj->base.read_domains |= I915_GEM_DOMAIN_CPU; 4277 } 4278 4279 /* It should now be out of any other write domains, and we can update 4280 * the domain values for our changes. 4281 */ 4282 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 4283 4284 /* If we're writing through the CPU, then the GPU read domains will 4285 * need to be invalidated at next use. 4286 */ 4287 if (write) { 4288 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4289 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4290 } 4291 4292 trace_i915_gem_object_change_domain(obj, 4293 old_read_domains, 4294 old_write_domain); 4295 4296 return 0; 4297 } 4298 4299 /* Throttle our rendering by waiting until the ring has completed our requests 4300 * emitted over 20 msec ago. 4301 * 4302 * Note that if we were to use the current jiffies each time around the loop, 4303 * we wouldn't escape the function with any frames outstanding if the time to 4304 * render a frame was over 20ms. 4305 * 4306 * This should get us reasonable parallelism between CPU and GPU but also 4307 * relatively low latency when blocking on a particular request to finish. 4308 */ 4309 static int 4310 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 4311 { 4312 struct drm_i915_private *dev_priv = dev->dev_private; 4313 struct drm_i915_file_private *file_priv = file->driver_priv; 4314 unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES; 4315 struct drm_i915_gem_request *request, *target = NULL; 4316 int ret; 4317 4318 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 4319 if (ret) 4320 return ret; 4321 4322 /* ABI: return -EIO if already wedged */ 4323 if (i915_terminally_wedged(&dev_priv->gpu_error)) 4324 return -EIO; 4325 4326 spin_lock(&file_priv->mm.lock); 4327 list_for_each_entry(request, &file_priv->mm.request_list, client_list) { 4328 if (time_after_eq(request->emitted_jiffies, recent_enough)) 4329 break; 4330 4331 /* 4332 * Note that the request might not have been submitted yet. 4333 * In which case emitted_jiffies will be zero. 4334 */ 4335 if (!request->emitted_jiffies) 4336 continue; 4337 4338 target = request; 4339 } 4340 if (target) 4341 i915_gem_request_reference(target); 4342 spin_unlock(&file_priv->mm.lock); 4343 4344 if (target == NULL) 4345 return 0; 4346 4347 ret = __i915_wait_request(target, true, NULL, NULL); 4348 if (ret == 0) 4349 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0); 4350 4351 i915_gem_request_unreference__unlocked(target); 4352 4353 return ret; 4354 } 4355 4356 static bool 4357 i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags) 4358 { 4359 struct drm_i915_gem_object *obj = vma->obj; 4360 4361 if (alignment && 4362 vma->node.start & (alignment - 1)) 4363 return true; 4364 4365 if (flags & PIN_MAPPABLE && !obj->map_and_fenceable) 4366 return true; 4367 4368 if (flags & PIN_OFFSET_BIAS && 4369 vma->node.start < (flags & PIN_OFFSET_MASK)) 4370 return true; 4371 4372 if (flags & PIN_OFFSET_FIXED && 4373 vma->node.start != (flags & PIN_OFFSET_MASK)) 4374 return true; 4375 4376 return false; 4377 } 4378 4379 void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) 4380 { 4381 struct drm_i915_gem_object *obj = vma->obj; 4382 bool mappable, fenceable; 4383 u32 fence_size, fence_alignment; 4384 4385 fence_size = i915_gem_get_gtt_size(obj->base.dev, 4386 obj->base.size, 4387 obj->tiling_mode); 4388 fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev, 4389 obj->base.size, 4390 obj->tiling_mode, 4391 true); 4392 4393 fenceable = (vma->node.size == fence_size && 4394 (vma->node.start & (fence_alignment - 1)) == 0); 4395 4396 mappable = (vma->node.start + fence_size <= 4397 to_i915(obj->base.dev)->ggtt.mappable_end); 4398 4399 obj->map_and_fenceable = mappable && fenceable; 4400 } 4401 4402 static int 4403 i915_gem_object_do_pin(struct drm_i915_gem_object *obj, 4404 struct i915_address_space *vm, 4405 const struct i915_ggtt_view *ggtt_view, 4406 uint32_t alignment, 4407 uint64_t flags) 4408 { 4409 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 4410 struct i915_vma *vma; 4411 unsigned bound; 4412 int ret; 4413 4414 if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base)) 4415 return -ENODEV; 4416 4417 if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm))) 4418 return -EINVAL; 4419 4420 if (WARN_ON((flags & (PIN_MAPPABLE | PIN_GLOBAL)) == PIN_MAPPABLE)) 4421 return -EINVAL; 4422 4423 if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view)) 4424 return -EINVAL; 4425 4426 vma = ggtt_view ? i915_gem_obj_to_ggtt_view(obj, ggtt_view) : 4427 i915_gem_obj_to_vma(obj, vm); 4428 4429 if (vma) { 4430 if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) 4431 return -EBUSY; 4432 4433 if (i915_vma_misplaced(vma, alignment, flags)) { 4434 WARN(vma->pin_count, 4435 "bo is already pinned in %s with incorrect alignment:" 4436 " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d," 4437 " obj->map_and_fenceable=%d\n", 4438 ggtt_view ? "ggtt" : "ppgtt", 4439 upper_32_bits(vma->node.start), 4440 lower_32_bits(vma->node.start), 4441 alignment, 4442 !!(flags & PIN_MAPPABLE), 4443 obj->map_and_fenceable); 4444 ret = i915_vma_unbind(vma); 4445 if (ret) 4446 return ret; 4447 4448 vma = NULL; 4449 } 4450 } 4451 4452 bound = vma ? vma->bound : 0; 4453 if (vma == NULL || !drm_mm_node_allocated(&vma->node)) { 4454 vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment, 4455 flags); 4456 if (IS_ERR(vma)) 4457 return PTR_ERR(vma); 4458 } else { 4459 ret = i915_vma_bind(vma, obj->cache_level, flags); 4460 if (ret) 4461 return ret; 4462 } 4463 4464 if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL && 4465 (bound ^ vma->bound) & GLOBAL_BIND) { 4466 __i915_vma_set_map_and_fenceable(vma); 4467 WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable); 4468 } 4469 4470 vma->pin_count++; 4471 return 0; 4472 } 4473 4474 int 4475 i915_gem_object_pin(struct drm_i915_gem_object *obj, 4476 struct i915_address_space *vm, 4477 uint32_t alignment, 4478 uint64_t flags) 4479 { 4480 return i915_gem_object_do_pin(obj, vm, 4481 i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL, 4482 alignment, flags); 4483 } 4484 4485 int 4486 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, 4487 const struct i915_ggtt_view *view, 4488 uint32_t alignment, 4489 uint64_t flags) 4490 { 4491 struct drm_device *dev = obj->base.dev; 4492 struct drm_i915_private *dev_priv = to_i915(dev); 4493 struct i915_ggtt *ggtt = &dev_priv->ggtt; 4494 4495 BUG_ON(!view); 4496 4497 return i915_gem_object_do_pin(obj, &ggtt->base, view, 4498 alignment, flags | PIN_GLOBAL); 4499 } 4500 4501 void 4502 i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj, 4503 const struct i915_ggtt_view *view) 4504 { 4505 struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view); 4506 4507 WARN_ON(vma->pin_count == 0); 4508 WARN_ON(!i915_gem_obj_ggtt_bound_view(obj, view)); 4509 4510 --vma->pin_count; 4511 } 4512 4513 int 4514 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 4515 struct drm_file *file) 4516 { 4517 struct drm_i915_gem_busy *args = data; 4518 struct drm_i915_gem_object *obj; 4519 int ret; 4520 4521 ret = i915_mutex_lock_interruptible(dev); 4522 if (ret) 4523 return ret; 4524 4525 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 4526 if (&obj->base == NULL) { 4527 ret = -ENOENT; 4528 goto unlock; 4529 } 4530 4531 /* Count all active objects as busy, even if they are currently not used 4532 * by the gpu. Users of this interface expect objects to eventually 4533 * become non-busy without any further actions, therefore emit any 4534 * necessary flushes here. 4535 */ 4536 ret = i915_gem_object_flush_active(obj); 4537 if (ret) 4538 goto unref; 4539 4540 args->busy = 0; 4541 if (obj->active) { 4542 int i; 4543 4544 for (i = 0; i < I915_NUM_ENGINES; i++) { 4545 struct drm_i915_gem_request *req; 4546 4547 req = obj->last_read_req[i]; 4548 if (req) 4549 args->busy |= 1 << (16 + req->engine->exec_id); 4550 } 4551 if (obj->last_write_req) 4552 args->busy |= obj->last_write_req->engine->exec_id; 4553 } 4554 4555 unref: 4556 drm_gem_object_unreference(&obj->base); 4557 unlock: 4558 mutex_unlock(&dev->struct_mutex); 4559 return ret; 4560 } 4561 4562 int 4563 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 4564 struct drm_file *file_priv) 4565 { 4566 return i915_gem_ring_throttle(dev, file_priv); 4567 } 4568 4569 int 4570 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 4571 struct drm_file *file_priv) 4572 { 4573 struct drm_i915_private *dev_priv = dev->dev_private; 4574 struct drm_i915_gem_madvise *args = data; 4575 struct drm_i915_gem_object *obj; 4576 int ret; 4577 4578 switch (args->madv) { 4579 case I915_MADV_DONTNEED: 4580 case I915_MADV_WILLNEED: 4581 break; 4582 default: 4583 return -EINVAL; 4584 } 4585 4586 ret = i915_mutex_lock_interruptible(dev); 4587 if (ret) 4588 return ret; 4589 4590 obj = to_intel_bo(drm_gem_object_lookup(file_priv, args->handle)); 4591 if (&obj->base == NULL) { 4592 ret = -ENOENT; 4593 goto unlock; 4594 } 4595 4596 if (i915_gem_obj_is_pinned(obj)) { 4597 ret = -EINVAL; 4598 goto out; 4599 } 4600 4601 if (obj->pages && 4602 obj->tiling_mode != I915_TILING_NONE && 4603 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 4604 if (obj->madv == I915_MADV_WILLNEED) 4605 i915_gem_object_unpin_pages(obj); 4606 if (args->madv == I915_MADV_WILLNEED) 4607 i915_gem_object_pin_pages(obj); 4608 } 4609 4610 if (obj->madv != __I915_MADV_PURGED) 4611 obj->madv = args->madv; 4612 4613 /* if the object is no longer attached, discard its backing storage */ 4614 if (obj->madv == I915_MADV_DONTNEED && obj->pages == NULL) 4615 i915_gem_object_truncate(obj); 4616 4617 args->retained = obj->madv != __I915_MADV_PURGED; 4618 4619 out: 4620 drm_gem_object_unreference(&obj->base); 4621 unlock: 4622 mutex_unlock(&dev->struct_mutex); 4623 return ret; 4624 } 4625 4626 void i915_gem_object_init(struct drm_i915_gem_object *obj, 4627 const struct drm_i915_gem_object_ops *ops) 4628 { 4629 int i; 4630 4631 INIT_LIST_HEAD(&obj->global_list); 4632 for (i = 0; i < I915_NUM_ENGINES; i++) 4633 INIT_LIST_HEAD(&obj->engine_list[i]); 4634 INIT_LIST_HEAD(&obj->obj_exec_link); 4635 INIT_LIST_HEAD(&obj->vma_list); 4636 INIT_LIST_HEAD(&obj->batch_pool_link); 4637 4638 obj->ops = ops; 4639 4640 obj->fence_reg = I915_FENCE_REG_NONE; 4641 obj->madv = I915_MADV_WILLNEED; 4642 4643 i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size); 4644 } 4645 4646 static const struct drm_i915_gem_object_ops i915_gem_object_ops = { 4647 .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE, 4648 .get_pages = i915_gem_object_get_pages_gtt, 4649 .put_pages = i915_gem_object_put_pages_gtt, 4650 }; 4651 4652 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, 4653 size_t size) 4654 { 4655 struct drm_i915_gem_object *obj; 4656 #if 0 4657 struct address_space *mapping; 4658 gfp_t mask; 4659 #endif 4660 4661 obj = i915_gem_object_alloc(dev); 4662 if (obj == NULL) 4663 return NULL; 4664 4665 if (drm_gem_object_init(dev, &obj->base, size) != 0) { 4666 i915_gem_object_free(obj); 4667 return NULL; 4668 } 4669 4670 #if 0 4671 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 4672 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) { 4673 /* 965gm cannot relocate objects above 4GiB. */ 4674 mask &= ~__GFP_HIGHMEM; 4675 mask |= __GFP_DMA32; 4676 } 4677 4678 mapping = file_inode(obj->base.filp)->i_mapping; 4679 mapping_set_gfp_mask(mapping, mask); 4680 #endif 4681 4682 i915_gem_object_init(obj, &i915_gem_object_ops); 4683 4684 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4685 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4686 4687 if (HAS_LLC(dev)) { 4688 /* On some devices, we can have the GPU use the LLC (the CPU 4689 * cache) for about a 10% performance improvement 4690 * compared to uncached. Graphics requests other than 4691 * display scanout are coherent with the CPU in 4692 * accessing this cache. This means in this mode we 4693 * don't need to clflush on the CPU side, and on the 4694 * GPU side we only need to flush internal caches to 4695 * get data visible to the CPU. 4696 * 4697 * However, we maintain the display planes as UC, and so 4698 * need to rebind when first used as such. 4699 */ 4700 obj->cache_level = I915_CACHE_LLC; 4701 } else 4702 obj->cache_level = I915_CACHE_NONE; 4703 4704 trace_i915_gem_object_create(obj); 4705 4706 return obj; 4707 } 4708 4709 static bool discard_backing_storage(struct drm_i915_gem_object *obj) 4710 { 4711 /* If we are the last user of the backing storage (be it shmemfs 4712 * pages or stolen etc), we know that the pages are going to be 4713 * immediately released. In this case, we can then skip copying 4714 * back the contents from the GPU. 4715 */ 4716 4717 if (obj->madv != I915_MADV_WILLNEED) 4718 return false; 4719 4720 if (obj->base.vm_obj == NULL) 4721 return true; 4722 4723 /* At first glance, this looks racy, but then again so would be 4724 * userspace racing mmap against close. However, the first external 4725 * reference to the filp can only be obtained through the 4726 * i915_gem_mmap_ioctl() which safeguards us against the user 4727 * acquiring such a reference whilst we are in the middle of 4728 * freeing the object. 4729 */ 4730 #if 0 4731 return atomic_long_read(&obj->base.filp->f_count) == 1; 4732 #else 4733 return false; 4734 #endif 4735 } 4736 4737 void i915_gem_free_object(struct drm_gem_object *gem_obj) 4738 { 4739 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 4740 struct drm_device *dev = obj->base.dev; 4741 struct drm_i915_private *dev_priv = dev->dev_private; 4742 struct i915_vma *vma, *next; 4743 4744 intel_runtime_pm_get(dev_priv); 4745 4746 trace_i915_gem_object_destroy(obj); 4747 4748 list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) { 4749 int ret; 4750 4751 vma->pin_count = 0; 4752 ret = i915_vma_unbind(vma); 4753 if (WARN_ON(ret == -ERESTARTSYS)) { 4754 bool was_interruptible; 4755 4756 was_interruptible = dev_priv->mm.interruptible; 4757 dev_priv->mm.interruptible = false; 4758 4759 WARN_ON(i915_vma_unbind(vma)); 4760 4761 dev_priv->mm.interruptible = was_interruptible; 4762 } 4763 } 4764 4765 /* Stolen objects don't hold a ref, but do hold pin count. Fix that up 4766 * before progressing. */ 4767 if (obj->stolen) 4768 i915_gem_object_unpin_pages(obj); 4769 4770 WARN_ON(obj->frontbuffer_bits); 4771 4772 if (obj->pages && obj->madv == I915_MADV_WILLNEED && 4773 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES && 4774 obj->tiling_mode != I915_TILING_NONE) 4775 i915_gem_object_unpin_pages(obj); 4776 4777 if (WARN_ON(obj->pages_pin_count)) 4778 obj->pages_pin_count = 0; 4779 if (discard_backing_storage(obj)) 4780 obj->madv = I915_MADV_DONTNEED; 4781 i915_gem_object_put_pages(obj); 4782 i915_gem_object_free_mmap_offset(obj); 4783 4784 BUG_ON(obj->pages); 4785 4786 #if 0 4787 if (obj->base.import_attach) 4788 drm_prime_gem_destroy(&obj->base, NULL); 4789 #endif 4790 4791 if (obj->ops->release) 4792 obj->ops->release(obj); 4793 4794 drm_gem_object_release(&obj->base); 4795 i915_gem_info_remove_obj(dev_priv, obj->base.size); 4796 4797 kfree(obj->bit_17); 4798 i915_gem_object_free(obj); 4799 4800 intel_runtime_pm_put(dev_priv); 4801 } 4802 4803 struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, 4804 struct i915_address_space *vm) 4805 { 4806 struct i915_vma *vma; 4807 list_for_each_entry(vma, &obj->vma_list, obj_link) { 4808 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL && 4809 vma->vm == vm) 4810 return vma; 4811 } 4812 return NULL; 4813 } 4814 4815 struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj, 4816 const struct i915_ggtt_view *view) 4817 { 4818 struct drm_device *dev = obj->base.dev; 4819 struct drm_i915_private *dev_priv = to_i915(dev); 4820 struct i915_ggtt *ggtt = &dev_priv->ggtt; 4821 struct i915_vma *vma; 4822 4823 BUG_ON(!view); 4824 4825 list_for_each_entry(vma, &obj->vma_list, obj_link) 4826 if (vma->vm == &ggtt->base && 4827 i915_ggtt_view_equal(&vma->ggtt_view, view)) 4828 return vma; 4829 return NULL; 4830 } 4831 4832 void i915_gem_vma_destroy(struct i915_vma *vma) 4833 { 4834 WARN_ON(vma->node.allocated); 4835 4836 /* Keep the vma as a placeholder in the execbuffer reservation lists */ 4837 if (!list_empty(&vma->exec_list)) 4838 return; 4839 4840 if (!vma->is_ggtt) 4841 i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm)); 4842 4843 list_del(&vma->obj_link); 4844 4845 kfree(vma); 4846 } 4847 4848 static void 4849 i915_gem_stop_engines(struct drm_device *dev) 4850 { 4851 struct drm_i915_private *dev_priv = dev->dev_private; 4852 struct intel_engine_cs *engine; 4853 4854 for_each_engine(engine, dev_priv) 4855 dev_priv->gt.stop_engine(engine); 4856 } 4857 4858 int 4859 i915_gem_suspend(struct drm_device *dev) 4860 { 4861 struct drm_i915_private *dev_priv = dev->dev_private; 4862 int ret = 0; 4863 4864 mutex_lock(&dev->struct_mutex); 4865 ret = i915_gpu_idle(dev); 4866 if (ret) 4867 goto err; 4868 4869 i915_gem_retire_requests(dev); 4870 4871 i915_gem_stop_engines(dev); 4872 mutex_unlock(&dev->struct_mutex); 4873 4874 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); 4875 cancel_delayed_work_sync(&dev_priv->mm.retire_work); 4876 #if 0 4877 flush_delayed_work(&dev_priv->mm.idle_work); 4878 #endif 4879 4880 /* Assert that we sucessfully flushed all the work and 4881 * reset the GPU back to its idle, low power state. 4882 */ 4883 WARN_ON(dev_priv->mm.busy); 4884 4885 return 0; 4886 4887 err: 4888 mutex_unlock(&dev->struct_mutex); 4889 return ret; 4890 } 4891 4892 int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice) 4893 { 4894 struct intel_engine_cs *engine = req->engine; 4895 struct drm_device *dev = engine->dev; 4896 struct drm_i915_private *dev_priv = dev->dev_private; 4897 u32 *remap_info = dev_priv->l3_parity.remap_info[slice]; 4898 int i, ret; 4899 4900 if (!HAS_L3_DPF(dev) || !remap_info) 4901 return 0; 4902 4903 ret = intel_ring_begin(req, GEN7_L3LOG_SIZE / 4 * 3); 4904 if (ret) 4905 return ret; 4906 4907 /* 4908 * Note: We do not worry about the concurrent register cacheline hang 4909 * here because no other code should access these registers other than 4910 * at initialization time. 4911 */ 4912 for (i = 0; i < GEN7_L3LOG_SIZE / 4; i++) { 4913 intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1)); 4914 intel_ring_emit_reg(engine, GEN7_L3LOG(slice, i)); 4915 intel_ring_emit(engine, remap_info[i]); 4916 } 4917 4918 intel_ring_advance(engine); 4919 4920 return ret; 4921 } 4922 4923 void i915_gem_init_swizzling(struct drm_device *dev) 4924 { 4925 struct drm_i915_private *dev_priv = dev->dev_private; 4926 4927 if (INTEL_INFO(dev)->gen < 5 || 4928 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 4929 return; 4930 4931 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 4932 DISP_TILE_SURFACE_SWIZZLING); 4933 4934 if (IS_GEN5(dev)) 4935 return; 4936 4937 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 4938 if (IS_GEN6(dev)) 4939 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 4940 else if (IS_GEN7(dev)) 4941 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 4942 else if (IS_GEN8(dev)) 4943 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW)); 4944 else 4945 BUG(); 4946 } 4947 4948 static void init_unused_ring(struct drm_device *dev, u32 base) 4949 { 4950 struct drm_i915_private *dev_priv = dev->dev_private; 4951 4952 I915_WRITE(RING_CTL(base), 0); 4953 I915_WRITE(RING_HEAD(base), 0); 4954 I915_WRITE(RING_TAIL(base), 0); 4955 I915_WRITE(RING_START(base), 0); 4956 } 4957 4958 static void init_unused_rings(struct drm_device *dev) 4959 { 4960 if (IS_I830(dev)) { 4961 init_unused_ring(dev, PRB1_BASE); 4962 init_unused_ring(dev, SRB0_BASE); 4963 init_unused_ring(dev, SRB1_BASE); 4964 init_unused_ring(dev, SRB2_BASE); 4965 init_unused_ring(dev, SRB3_BASE); 4966 } else if (IS_GEN2(dev)) { 4967 init_unused_ring(dev, SRB0_BASE); 4968 init_unused_ring(dev, SRB1_BASE); 4969 } else if (IS_GEN3(dev)) { 4970 init_unused_ring(dev, PRB1_BASE); 4971 init_unused_ring(dev, PRB2_BASE); 4972 } 4973 } 4974 4975 int i915_gem_init_engines(struct drm_device *dev) 4976 { 4977 struct drm_i915_private *dev_priv = dev->dev_private; 4978 int ret; 4979 4980 ret = intel_init_render_ring_buffer(dev); 4981 if (ret) 4982 return ret; 4983 4984 if (HAS_BSD(dev)) { 4985 ret = intel_init_bsd_ring_buffer(dev); 4986 if (ret) 4987 goto cleanup_render_ring; 4988 } 4989 4990 if (HAS_BLT(dev)) { 4991 ret = intel_init_blt_ring_buffer(dev); 4992 if (ret) 4993 goto cleanup_bsd_ring; 4994 } 4995 4996 if (HAS_VEBOX(dev)) { 4997 ret = intel_init_vebox_ring_buffer(dev); 4998 if (ret) 4999 goto cleanup_blt_ring; 5000 } 5001 5002 if (HAS_BSD2(dev)) { 5003 ret = intel_init_bsd2_ring_buffer(dev); 5004 if (ret) 5005 goto cleanup_vebox_ring; 5006 } 5007 5008 return 0; 5009 5010 cleanup_vebox_ring: 5011 intel_cleanup_engine(&dev_priv->engine[VECS]); 5012 cleanup_blt_ring: 5013 intel_cleanup_engine(&dev_priv->engine[BCS]); 5014 cleanup_bsd_ring: 5015 intel_cleanup_engine(&dev_priv->engine[VCS]); 5016 cleanup_render_ring: 5017 intel_cleanup_engine(&dev_priv->engine[RCS]); 5018 5019 return ret; 5020 } 5021 5022 int 5023 i915_gem_init_hw(struct drm_device *dev) 5024 { 5025 struct drm_i915_private *dev_priv = dev->dev_private; 5026 struct intel_engine_cs *engine; 5027 int ret, j; 5028 5029 /* Double layer security blanket, see i915_gem_init() */ 5030 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5031 5032 if (HAS_EDRAM(dev) && INTEL_GEN(dev_priv) < 9) 5033 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf)); 5034 5035 if (IS_HASWELL(dev)) 5036 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev) ? 5037 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); 5038 5039 if (HAS_PCH_NOP(dev)) { 5040 if (IS_IVYBRIDGE(dev)) { 5041 u32 temp = I915_READ(GEN7_MSG_CTL); 5042 temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK); 5043 I915_WRITE(GEN7_MSG_CTL, temp); 5044 } else if (INTEL_INFO(dev)->gen >= 7) { 5045 u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT); 5046 temp &= ~RESET_PCH_HANDSHAKE_ENABLE; 5047 I915_WRITE(HSW_NDE_RSTWRN_OPT, temp); 5048 } 5049 } 5050 5051 i915_gem_init_swizzling(dev); 5052 5053 /* 5054 * At least 830 can leave some of the unused rings 5055 * "active" (ie. head != tail) after resume which 5056 * will prevent c3 entry. Makes sure all unused rings 5057 * are totally idle. 5058 */ 5059 init_unused_rings(dev); 5060 5061 BUG_ON(!dev_priv->kernel_context); 5062 5063 ret = i915_ppgtt_init_hw(dev); 5064 if (ret) { 5065 DRM_ERROR("PPGTT enable HW failed %d\n", ret); 5066 goto out; 5067 } 5068 5069 /* Need to do basic initialisation of all rings first: */ 5070 for_each_engine(engine, dev_priv) { 5071 ret = engine->init_hw(engine); 5072 if (ret) 5073 goto out; 5074 } 5075 5076 intel_mocs_init_l3cc_table(dev); 5077 5078 /* We can't enable contexts until all firmware is loaded */ 5079 if (HAS_GUC_UCODE(dev)) { 5080 ret = intel_guc_ucode_load(dev); 5081 if (ret) { 5082 DRM_ERROR("Failed to initialize GuC, error %d\n", ret); 5083 ret = -EIO; 5084 goto out; 5085 } 5086 } 5087 5088 /* 5089 * Increment the next seqno by 0x100 so we have a visible break 5090 * on re-initialisation 5091 */ 5092 ret = i915_gem_set_seqno(dev, dev_priv->next_seqno+0x100); 5093 if (ret) 5094 goto out; 5095 5096 /* Now it is safe to go back round and do everything else: */ 5097 for_each_engine(engine, dev_priv) { 5098 struct drm_i915_gem_request *req; 5099 5100 req = i915_gem_request_alloc(engine, NULL); 5101 if (IS_ERR(req)) { 5102 ret = PTR_ERR(req); 5103 break; 5104 } 5105 5106 if (engine->id == RCS) { 5107 for (j = 0; j < NUM_L3_SLICES(dev); j++) { 5108 ret = i915_gem_l3_remap(req, j); 5109 if (ret) 5110 goto err_request; 5111 } 5112 } 5113 5114 ret = i915_ppgtt_init_ring(req); 5115 if (ret) 5116 goto err_request; 5117 5118 ret = i915_gem_context_enable(req); 5119 if (ret) 5120 goto err_request; 5121 5122 err_request: 5123 i915_add_request_no_flush(req); 5124 if (ret) { 5125 DRM_ERROR("Failed to enable %s, error=%d\n", 5126 engine->name, ret); 5127 i915_gem_cleanup_engines(dev); 5128 break; 5129 } 5130 } 5131 5132 out: 5133 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5134 return ret; 5135 } 5136 5137 int i915_gem_init(struct drm_device *dev) 5138 { 5139 struct drm_i915_private *dev_priv = dev->dev_private; 5140 int ret; 5141 5142 i915.enable_execlists = intel_sanitize_enable_execlists(dev, 5143 i915.enable_execlists); 5144 5145 mutex_lock(&dev->struct_mutex); 5146 5147 if (!i915.enable_execlists) { 5148 dev_priv->gt.execbuf_submit = i915_gem_ringbuffer_submission; 5149 dev_priv->gt.init_engines = i915_gem_init_engines; 5150 dev_priv->gt.cleanup_engine = intel_cleanup_engine; 5151 dev_priv->gt.stop_engine = intel_stop_engine; 5152 } else { 5153 dev_priv->gt.execbuf_submit = intel_execlists_submission; 5154 dev_priv->gt.init_engines = intel_logical_rings_init; 5155 dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup; 5156 dev_priv->gt.stop_engine = intel_logical_ring_stop; 5157 } 5158 5159 /* This is just a security blanket to placate dragons. 5160 * On some systems, we very sporadically observe that the first TLBs 5161 * used by the CS may be stale, despite us poking the TLB reset. If 5162 * we hold the forcewake during initialisation these problems 5163 * just magically go away. 5164 */ 5165 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5166 5167 ret = i915_gem_init_userptr(dev); 5168 if (ret) 5169 goto out_unlock; 5170 5171 i915_gem_init_ggtt(dev); 5172 5173 ret = i915_gem_context_init(dev); 5174 if (ret) 5175 goto out_unlock; 5176 5177 ret = dev_priv->gt.init_engines(dev); 5178 if (ret) 5179 goto out_unlock; 5180 5181 ret = i915_gem_init_hw(dev); 5182 if (ret == -EIO) { 5183 /* Allow ring initialisation to fail by marking the GPU as 5184 * wedged. But we only want to do this where the GPU is angry, 5185 * for all other failure, such as an allocation failure, bail. 5186 */ 5187 DRM_ERROR("Failed to initialize GPU, declaring it wedged\n"); 5188 atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter); 5189 ret = 0; 5190 } 5191 5192 out_unlock: 5193 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5194 mutex_unlock(&dev->struct_mutex); 5195 5196 return ret; 5197 } 5198 5199 void 5200 i915_gem_cleanup_engines(struct drm_device *dev) 5201 { 5202 struct drm_i915_private *dev_priv = dev->dev_private; 5203 struct intel_engine_cs *engine; 5204 5205 for_each_engine(engine, dev_priv) 5206 dev_priv->gt.cleanup_engine(engine); 5207 5208 if (i915.enable_execlists) 5209 /* 5210 * Neither the BIOS, ourselves or any other kernel 5211 * expects the system to be in execlists mode on startup, 5212 * so we need to reset the GPU back to legacy mode. 5213 */ 5214 intel_gpu_reset(dev, ALL_ENGINES); 5215 } 5216 5217 static void 5218 init_engine_lists(struct intel_engine_cs *engine) 5219 { 5220 INIT_LIST_HEAD(&engine->active_list); 5221 INIT_LIST_HEAD(&engine->request_list); 5222 } 5223 5224 void 5225 i915_gem_load_init_fences(struct drm_i915_private *dev_priv) 5226 { 5227 struct drm_device *dev = dev_priv->dev; 5228 5229 if (INTEL_INFO(dev_priv)->gen >= 7 && !IS_VALLEYVIEW(dev_priv) && 5230 !IS_CHERRYVIEW(dev_priv)) 5231 dev_priv->num_fence_regs = 32; 5232 else if (INTEL_INFO(dev_priv)->gen >= 4 || IS_I945G(dev_priv) || 5233 IS_I945GM(dev_priv) || IS_G33(dev_priv)) 5234 dev_priv->num_fence_regs = 16; 5235 else 5236 dev_priv->num_fence_regs = 8; 5237 5238 if (intel_vgpu_active(dev)) 5239 dev_priv->num_fence_regs = 5240 I915_READ(vgtif_reg(avail_rs.fence_num)); 5241 5242 /* Initialize fence registers to zero */ 5243 i915_gem_restore_fences(dev); 5244 5245 i915_gem_detect_bit_6_swizzle(dev); 5246 } 5247 5248 void 5249 i915_gem_load_init(struct drm_device *dev) 5250 { 5251 struct drm_i915_private *dev_priv = dev->dev_private; 5252 int i; 5253 5254 INIT_LIST_HEAD(&dev_priv->vm_list); 5255 INIT_LIST_HEAD(&dev_priv->context_list); 5256 INIT_LIST_HEAD(&dev_priv->mm.unbound_list); 5257 INIT_LIST_HEAD(&dev_priv->mm.bound_list); 5258 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 5259 for (i = 0; i < I915_NUM_ENGINES; i++) 5260 init_engine_lists(&dev_priv->engine[i]); 5261 for (i = 0; i < I915_MAX_NUM_FENCES; i++) 5262 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); 5263 INIT_DELAYED_WORK(&dev_priv->mm.retire_work, 5264 i915_gem_retire_work_handler); 5265 INIT_DELAYED_WORK(&dev_priv->mm.idle_work, 5266 i915_gem_idle_work_handler); 5267 init_waitqueue_head(&dev_priv->gpu_error.reset_queue); 5268 5269 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; 5270 5271 /* 5272 * Set initial sequence number for requests. 5273 * Using this number allows the wraparound to happen early, 5274 * catching any obvious problems. 5275 */ 5276 dev_priv->next_seqno = ((u32)~0 - 0x1100); 5277 dev_priv->last_seqno = ((u32)~0 - 0x1101); 5278 5279 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 5280 5281 init_waitqueue_head(&dev_priv->pending_flip_queue); 5282 5283 dev_priv->mm.interruptible = true; 5284 5285 lockinit(&dev_priv->fb_tracking.lock, "drmftl", 0, LK_CANRECURSE); 5286 } 5287 5288 void i915_gem_load_cleanup(struct drm_device *dev) 5289 { 5290 #if 0 5291 struct drm_i915_private *dev_priv = to_i915(dev); 5292 5293 kmem_cache_destroy(dev_priv->requests); 5294 kmem_cache_destroy(dev_priv->vmas); 5295 kmem_cache_destroy(dev_priv->objects); 5296 #endif 5297 } 5298 5299 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 5300 { 5301 struct drm_i915_file_private *file_priv = file->driver_priv; 5302 5303 /* Clean up our request list when the client is going away, so that 5304 * later retire_requests won't dereference our soon-to-be-gone 5305 * file_priv. 5306 */ 5307 spin_lock(&file_priv->mm.lock); 5308 while (!list_empty(&file_priv->mm.request_list)) { 5309 struct drm_i915_gem_request *request; 5310 5311 request = list_first_entry(&file_priv->mm.request_list, 5312 struct drm_i915_gem_request, 5313 client_list); 5314 list_del(&request->client_list); 5315 request->file_priv = NULL; 5316 } 5317 spin_unlock(&file_priv->mm.lock); 5318 5319 if (!list_empty(&file_priv->rps.link)) { 5320 lockmgr(&to_i915(dev)->rps.client_lock, LK_EXCLUSIVE); 5321 list_del(&file_priv->rps.link); 5322 lockmgr(&to_i915(dev)->rps.client_lock, LK_RELEASE); 5323 } 5324 } 5325 5326 int 5327 i915_gem_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, 5328 vm_ooffset_t foff, struct ucred *cred, u_short *color) 5329 { 5330 *color = 0; /* XXXKIB */ 5331 return (0); 5332 } 5333 5334 void 5335 i915_gem_pager_dtor(void *handle) 5336 { 5337 struct drm_gem_object *obj; 5338 struct drm_device *dev; 5339 5340 obj = handle; 5341 dev = obj->dev; 5342 5343 mutex_lock(&dev->struct_mutex); 5344 drm_gem_free_mmap_offset(obj); 5345 i915_gem_release_mmap(to_intel_bo(obj)); 5346 drm_gem_object_unreference(obj); 5347 mutex_unlock(&dev->struct_mutex); 5348 } 5349 5350 int i915_gem_open(struct drm_device *dev, struct drm_file *file) 5351 { 5352 struct drm_i915_file_private *file_priv; 5353 int ret; 5354 5355 DRM_DEBUG_DRIVER("\n"); 5356 5357 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 5358 if (!file_priv) 5359 return -ENOMEM; 5360 5361 file->driver_priv = file_priv; 5362 file_priv->dev_priv = dev->dev_private; 5363 file_priv->file = file; 5364 INIT_LIST_HEAD(&file_priv->rps.link); 5365 5366 spin_init(&file_priv->mm.lock, "i915_priv"); 5367 INIT_LIST_HEAD(&file_priv->mm.request_list); 5368 5369 file_priv->bsd_ring = -1; 5370 5371 ret = i915_gem_context_open(dev, file); 5372 if (ret) 5373 kfree(file_priv); 5374 5375 return ret; 5376 } 5377 5378 /** 5379 * i915_gem_track_fb - update frontbuffer tracking 5380 * @old: current GEM buffer for the frontbuffer slots 5381 * @new: new GEM buffer for the frontbuffer slots 5382 * @frontbuffer_bits: bitmask of frontbuffer slots 5383 * 5384 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them 5385 * from @old and setting them in @new. Both @old and @new can be NULL. 5386 */ 5387 void i915_gem_track_fb(struct drm_i915_gem_object *old, 5388 struct drm_i915_gem_object *new, 5389 unsigned frontbuffer_bits) 5390 { 5391 if (old) { 5392 WARN_ON(!mutex_is_locked(&old->base.dev->struct_mutex)); 5393 WARN_ON(!(old->frontbuffer_bits & frontbuffer_bits)); 5394 old->frontbuffer_bits &= ~frontbuffer_bits; 5395 } 5396 5397 if (new) { 5398 WARN_ON(!mutex_is_locked(&new->base.dev->struct_mutex)); 5399 WARN_ON(new->frontbuffer_bits & frontbuffer_bits); 5400 new->frontbuffer_bits |= frontbuffer_bits; 5401 } 5402 } 5403 5404 /* All the new VM stuff */ 5405 u64 i915_gem_obj_offset(struct drm_i915_gem_object *o, 5406 struct i915_address_space *vm) 5407 { 5408 struct drm_i915_private *dev_priv = o->base.dev->dev_private; 5409 struct i915_vma *vma; 5410 5411 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); 5412 5413 list_for_each_entry(vma, &o->vma_list, obj_link) { 5414 if (vma->is_ggtt && 5415 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5416 continue; 5417 if (vma->vm == vm) 5418 return vma->node.start; 5419 } 5420 5421 WARN(1, "%s vma for this object not found.\n", 5422 i915_is_ggtt(vm) ? "global" : "ppgtt"); 5423 return -1; 5424 } 5425 5426 u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o, 5427 const struct i915_ggtt_view *view) 5428 { 5429 struct drm_i915_private *dev_priv = to_i915(o->base.dev); 5430 struct i915_ggtt *ggtt = &dev_priv->ggtt; 5431 struct i915_vma *vma; 5432 5433 list_for_each_entry(vma, &o->vma_list, obj_link) 5434 if (vma->vm == &ggtt->base && 5435 i915_ggtt_view_equal(&vma->ggtt_view, view)) 5436 return vma->node.start; 5437 5438 WARN(1, "global vma for this object not found. (view=%u)\n", view->type); 5439 return -1; 5440 } 5441 5442 bool i915_gem_obj_bound(struct drm_i915_gem_object *o, 5443 struct i915_address_space *vm) 5444 { 5445 struct i915_vma *vma; 5446 5447 list_for_each_entry(vma, &o->vma_list, obj_link) { 5448 if (vma->is_ggtt && 5449 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5450 continue; 5451 if (vma->vm == vm && drm_mm_node_allocated(&vma->node)) 5452 return true; 5453 } 5454 5455 return false; 5456 } 5457 5458 bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o, 5459 const struct i915_ggtt_view *view) 5460 { 5461 struct drm_i915_private *dev_priv = to_i915(o->base.dev); 5462 struct i915_ggtt *ggtt = &dev_priv->ggtt; 5463 struct i915_vma *vma; 5464 5465 list_for_each_entry(vma, &o->vma_list, obj_link) 5466 if (vma->vm == &ggtt->base && 5467 i915_ggtt_view_equal(&vma->ggtt_view, view) && 5468 drm_mm_node_allocated(&vma->node)) 5469 return true; 5470 5471 return false; 5472 } 5473 5474 bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o) 5475 { 5476 struct i915_vma *vma; 5477 5478 list_for_each_entry(vma, &o->vma_list, obj_link) 5479 if (drm_mm_node_allocated(&vma->node)) 5480 return true; 5481 5482 return false; 5483 } 5484 5485 unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o, 5486 struct i915_address_space *vm) 5487 { 5488 struct drm_i915_private *dev_priv = o->base.dev->dev_private; 5489 struct i915_vma *vma; 5490 5491 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); 5492 5493 BUG_ON(list_empty(&o->vma_list)); 5494 5495 list_for_each_entry(vma, &o->vma_list, obj_link) { 5496 if (vma->is_ggtt && 5497 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5498 continue; 5499 if (vma->vm == vm) 5500 return vma->node.size; 5501 } 5502 return 0; 5503 } 5504 5505 bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj) 5506 { 5507 struct i915_vma *vma; 5508 list_for_each_entry(vma, &obj->vma_list, obj_link) 5509 if (vma->pin_count > 0) 5510 return true; 5511 5512 return false; 5513 } 5514 5515 /* Like i915_gem_object_get_page(), but mark the returned page dirty */ 5516 struct page * 5517 i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n) 5518 { 5519 struct page *page; 5520 5521 /* Only default objects have per-page dirty tracking */ 5522 if (WARN_ON((obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE) == 0)) 5523 return NULL; 5524 5525 page = i915_gem_object_get_page(obj, n); 5526 set_page_dirty(page); 5527 return page; 5528 } 5529 5530 /* Allocate a new GEM object and fill it with the supplied data */ 5531 struct drm_i915_gem_object * 5532 i915_gem_object_create_from_data(struct drm_device *dev, 5533 const void *data, size_t size) 5534 { 5535 struct drm_i915_gem_object *obj; 5536 struct sg_table *sg; 5537 size_t bytes; 5538 int ret; 5539 5540 obj = i915_gem_alloc_object(dev, round_up(size, PAGE_SIZE)); 5541 if (IS_ERR_OR_NULL(obj)) 5542 return obj; 5543 5544 ret = i915_gem_object_set_to_cpu_domain(obj, true); 5545 if (ret) 5546 goto fail; 5547 5548 ret = i915_gem_object_get_pages(obj); 5549 if (ret) 5550 goto fail; 5551 5552 i915_gem_object_pin_pages(obj); 5553 sg = obj->pages; 5554 bytes = sg_copy_from_buffer(sg->sgl, sg->nents, data, size); 5555 obj->dirty = 1; /* Backing store is now out of date */ 5556 i915_gem_object_unpin_pages(obj); 5557 5558 if (WARN_ON(bytes != size)) { 5559 DRM_ERROR("Incomplete copy, wrote %zu of %zu", bytes, size); 5560 ret = -EFAULT; 5561 goto fail; 5562 } 5563 5564 return obj; 5565 5566 fail: 5567 drm_gem_object_unreference(&obj->base); 5568 return ERR_PTR(ret); 5569 } 5570