1 /* 2 * Copyright © 2008-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include <drm/drmP.h> 29 #include <drm/drm_vma_manager.h> 30 #include <drm/i915_drm.h> 31 #include "i915_drv.h" 32 #include "i915_vgpu.h" 33 #include "i915_trace.h" 34 #include "intel_drv.h" 35 #include "intel_mocs.h" 36 #include <linux/shmem_fs.h> 37 #include <linux/slab.h> 38 #include <linux/swap.h> 39 #include <linux/pci.h> 40 41 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); 42 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); 43 static void 44 i915_gem_object_retire__write(struct drm_i915_gem_object *obj); 45 static void 46 i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring); 47 48 static bool cpu_cache_is_coherent(struct drm_device *dev, 49 enum i915_cache_level level) 50 { 51 return HAS_LLC(dev) || level != I915_CACHE_NONE; 52 } 53 54 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 55 { 56 if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) 57 return true; 58 59 return obj->pin_display; 60 } 61 62 /* some bookkeeping */ 63 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 64 size_t size) 65 { 66 spin_lock(&dev_priv->mm.object_stat_lock); 67 dev_priv->mm.object_count++; 68 dev_priv->mm.object_memory += size; 69 spin_unlock(&dev_priv->mm.object_stat_lock); 70 } 71 72 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 73 size_t size) 74 { 75 spin_lock(&dev_priv->mm.object_stat_lock); 76 dev_priv->mm.object_count--; 77 dev_priv->mm.object_memory -= size; 78 spin_unlock(&dev_priv->mm.object_stat_lock); 79 } 80 81 static int 82 i915_gem_wait_for_error(struct i915_gpu_error *error) 83 { 84 int ret; 85 86 if (!i915_reset_in_progress(error)) 87 return 0; 88 89 /* 90 * Only wait 10 seconds for the gpu reset to complete to avoid hanging 91 * userspace. If it takes that long something really bad is going on and 92 * we should simply try to bail out and fail as gracefully as possible. 93 */ 94 ret = wait_event_interruptible_timeout(error->reset_queue, 95 !i915_reset_in_progress(error), 96 10*HZ); 97 if (ret == 0) { 98 DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); 99 return -EIO; 100 } else if (ret < 0) { 101 return ret; 102 } else { 103 return 0; 104 } 105 } 106 107 int i915_mutex_lock_interruptible(struct drm_device *dev) 108 { 109 struct drm_i915_private *dev_priv = dev->dev_private; 110 int ret; 111 112 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 113 if (ret) 114 return ret; 115 116 ret = mutex_lock_interruptible(&dev->struct_mutex); 117 if (ret) 118 return ret; 119 120 WARN_ON(i915_verify_lists(dev)); 121 return 0; 122 } 123 124 int 125 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 126 struct drm_file *file) 127 { 128 struct drm_i915_private *dev_priv = to_i915(dev); 129 struct i915_ggtt *ggtt = &dev_priv->ggtt; 130 struct drm_i915_gem_get_aperture *args = data; 131 struct i915_vma *vma; 132 size_t pinned; 133 134 pinned = 0; 135 mutex_lock(&dev->struct_mutex); 136 list_for_each_entry(vma, &ggtt->base.active_list, vm_link) 137 if (vma->pin_count) 138 pinned += vma->node.size; 139 list_for_each_entry(vma, &ggtt->base.inactive_list, vm_link) 140 if (vma->pin_count) 141 pinned += vma->node.size; 142 mutex_unlock(&dev->struct_mutex); 143 144 args->aper_size = ggtt->base.total; 145 args->aper_available_size = args->aper_size - pinned; 146 147 return 0; 148 } 149 150 #if 0 151 static int 152 i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) 153 { 154 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; 155 char *vaddr = obj->phys_handle->vaddr; 156 struct sg_table *st; 157 struct scatterlist *sg; 158 int i; 159 160 if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) 161 return -EINVAL; 162 163 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 164 struct page *page; 165 char *src; 166 167 page = shmem_read_mapping_page(mapping, i); 168 if (IS_ERR(page)) 169 return PTR_ERR(page); 170 171 src = kmap_atomic(page); 172 memcpy(vaddr, src, PAGE_SIZE); 173 drm_clflush_virt_range(vaddr, PAGE_SIZE); 174 kunmap_atomic(src); 175 176 put_page(page); 177 vaddr += PAGE_SIZE; 178 } 179 180 i915_gem_chipset_flush(obj->base.dev); 181 182 st = kmalloc(sizeof(*st), GFP_KERNEL); 183 if (st == NULL) 184 return -ENOMEM; 185 186 if (sg_alloc_table(st, 1, GFP_KERNEL)) { 187 kfree(st); 188 return -ENOMEM; 189 } 190 191 sg = st->sgl; 192 sg->offset = 0; 193 sg->length = obj->base.size; 194 195 sg_dma_address(sg) = obj->phys_handle->busaddr; 196 sg_dma_len(sg) = obj->base.size; 197 198 obj->pages = st; 199 return 0; 200 } 201 202 static void 203 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj) 204 { 205 int ret; 206 207 BUG_ON(obj->madv == __I915_MADV_PURGED); 208 209 ret = i915_gem_object_set_to_cpu_domain(obj, true); 210 if (WARN_ON(ret)) { 211 /* In the event of a disaster, abandon all caches and 212 * hope for the best. 213 */ 214 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 215 } 216 217 if (obj->madv == I915_MADV_DONTNEED) 218 obj->dirty = 0; 219 220 if (obj->dirty) { 221 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; 222 char *vaddr = obj->phys_handle->vaddr; 223 int i; 224 225 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 226 struct page *page; 227 char *dst; 228 229 page = shmem_read_mapping_page(mapping, i); 230 if (IS_ERR(page)) 231 continue; 232 233 dst = kmap_atomic(page); 234 drm_clflush_virt_range(vaddr, PAGE_SIZE); 235 memcpy(dst, vaddr, PAGE_SIZE); 236 kunmap_atomic(dst); 237 238 set_page_dirty(page); 239 if (obj->madv == I915_MADV_WILLNEED) 240 mark_page_accessed(page); 241 put_page(page); 242 vaddr += PAGE_SIZE; 243 } 244 obj->dirty = 0; 245 } 246 247 sg_free_table(obj->pages); 248 kfree(obj->pages); 249 } 250 251 static void 252 i915_gem_object_release_phys(struct drm_i915_gem_object *obj) 253 { 254 drm_pci_free(obj->base.dev, obj->phys_handle); 255 } 256 257 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { 258 .get_pages = i915_gem_object_get_pages_phys, 259 .put_pages = i915_gem_object_put_pages_phys, 260 .release = i915_gem_object_release_phys, 261 }; 262 #endif 263 264 static int 265 drop_pages(struct drm_i915_gem_object *obj) 266 { 267 struct i915_vma *vma, *next; 268 int ret; 269 270 drm_gem_object_reference(&obj->base); 271 list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) 272 if (i915_vma_unbind(vma)) 273 break; 274 275 ret = i915_gem_object_put_pages(obj); 276 drm_gem_object_unreference(&obj->base); 277 278 return ret; 279 } 280 281 int 282 i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, 283 int align) 284 { 285 drm_dma_handle_t *phys; 286 int ret; 287 288 if (obj->phys_handle) { 289 if ((unsigned long)obj->phys_handle->vaddr & (align -1)) 290 return -EBUSY; 291 292 return 0; 293 } 294 295 if (obj->madv != I915_MADV_WILLNEED) 296 return -EFAULT; 297 298 #if 0 299 if (obj->base.filp == NULL) 300 return -EINVAL; 301 #endif 302 303 ret = drop_pages(obj); 304 if (ret) 305 return ret; 306 307 /* create a new object */ 308 phys = drm_pci_alloc(obj->base.dev, obj->base.size, align); 309 if (!phys) 310 return -ENOMEM; 311 312 obj->phys_handle = phys; 313 #if 0 314 obj->ops = &i915_gem_phys_ops; 315 #endif 316 317 return i915_gem_object_get_pages(obj); 318 } 319 320 static int 321 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, 322 struct drm_i915_gem_pwrite *args, 323 struct drm_file *file_priv) 324 { 325 struct drm_device *dev = obj->base.dev; 326 void *vaddr = (char *)obj->phys_handle->vaddr + args->offset; 327 char __user *user_data = u64_to_user_ptr(args->data_ptr); 328 int ret = 0; 329 330 /* We manually control the domain here and pretend that it 331 * remains coherent i.e. in the GTT domain, like shmem_pwrite. 332 */ 333 ret = i915_gem_object_wait_rendering(obj, false); 334 if (ret) 335 return ret; 336 337 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 338 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) { 339 unsigned long unwritten; 340 341 /* The physical object once assigned is fixed for the lifetime 342 * of the obj, so we can safely drop the lock and continue 343 * to access vaddr. 344 */ 345 mutex_unlock(&dev->struct_mutex); 346 unwritten = copy_from_user(vaddr, user_data, args->size); 347 mutex_lock(&dev->struct_mutex); 348 if (unwritten) { 349 ret = -EFAULT; 350 goto out; 351 } 352 } 353 354 drm_clflush_virt_range(vaddr, args->size); 355 i915_gem_chipset_flush(dev); 356 357 out: 358 intel_fb_obj_flush(obj, false, ORIGIN_CPU); 359 return ret; 360 } 361 362 void *i915_gem_object_alloc(struct drm_device *dev) 363 { 364 return kmalloc(sizeof(struct drm_i915_gem_object), 365 M_DRM, M_WAITOK | M_ZERO); 366 } 367 368 void i915_gem_object_free(struct drm_i915_gem_object *obj) 369 { 370 kfree(obj); 371 } 372 373 static int 374 i915_gem_create(struct drm_file *file, 375 struct drm_device *dev, 376 uint64_t size, 377 uint32_t *handle_p) 378 { 379 struct drm_i915_gem_object *obj; 380 int ret; 381 u32 handle; 382 383 size = roundup(size, PAGE_SIZE); 384 if (size == 0) 385 return -EINVAL; 386 387 /* Allocate the new object */ 388 obj = i915_gem_alloc_object(dev, size); 389 if (obj == NULL) 390 return -ENOMEM; 391 392 ret = drm_gem_handle_create(file, &obj->base, &handle); 393 /* drop reference from allocate - handle holds it now */ 394 drm_gem_object_unreference_unlocked(&obj->base); 395 if (ret) 396 return ret; 397 398 *handle_p = handle; 399 return 0; 400 } 401 402 int 403 i915_gem_dumb_create(struct drm_file *file, 404 struct drm_device *dev, 405 struct drm_mode_create_dumb *args) 406 { 407 /* have to work out size/pitch and return them */ 408 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); 409 args->size = args->pitch * args->height; 410 return i915_gem_create(file, dev, 411 args->size, &args->handle); 412 } 413 414 /** 415 * Creates a new mm object and returns a handle to it. 416 */ 417 int 418 i915_gem_create_ioctl(struct drm_device *dev, void *data, 419 struct drm_file *file) 420 { 421 struct drm_i915_gem_create *args = data; 422 423 return i915_gem_create(file, dev, 424 args->size, &args->handle); 425 } 426 427 static inline int 428 __copy_to_user_swizzled(char __user *cpu_vaddr, 429 const char *gpu_vaddr, int gpu_offset, 430 int length) 431 { 432 int ret, cpu_offset = 0; 433 434 while (length > 0) { 435 int cacheline_end = ALIGN(gpu_offset + 1, 64); 436 int this_length = min(cacheline_end - gpu_offset, length); 437 int swizzled_gpu_offset = gpu_offset ^ 64; 438 439 ret = __copy_to_user(cpu_vaddr + cpu_offset, 440 gpu_vaddr + swizzled_gpu_offset, 441 this_length); 442 if (ret) 443 return ret + length; 444 445 cpu_offset += this_length; 446 gpu_offset += this_length; 447 length -= this_length; 448 } 449 450 return 0; 451 } 452 453 static inline int 454 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, 455 const char __user *cpu_vaddr, 456 int length) 457 { 458 int ret, cpu_offset = 0; 459 460 while (length > 0) { 461 int cacheline_end = ALIGN(gpu_offset + 1, 64); 462 int this_length = min(cacheline_end - gpu_offset, length); 463 int swizzled_gpu_offset = gpu_offset ^ 64; 464 465 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, 466 cpu_vaddr + cpu_offset, 467 this_length); 468 if (ret) 469 return ret + length; 470 471 cpu_offset += this_length; 472 gpu_offset += this_length; 473 length -= this_length; 474 } 475 476 return 0; 477 } 478 479 /* 480 * Pins the specified object's pages and synchronizes the object with 481 * GPU accesses. Sets needs_clflush to non-zero if the caller should 482 * flush the object from the CPU cache. 483 */ 484 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, 485 int *needs_clflush) 486 { 487 int ret; 488 489 *needs_clflush = 0; 490 491 #if 0 492 if (WARN_ON((obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE) == 0)) 493 return -EINVAL; 494 #endif 495 496 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) { 497 /* If we're not in the cpu read domain, set ourself into the gtt 498 * read domain and manually flush cachelines (if required). This 499 * optimizes for the case when the gpu will dirty the data 500 * anyway again before the next pread happens. */ 501 *needs_clflush = !cpu_cache_is_coherent(obj->base.dev, 502 obj->cache_level); 503 ret = i915_gem_object_wait_rendering(obj, true); 504 if (ret) 505 return ret; 506 } 507 508 ret = i915_gem_object_get_pages(obj); 509 if (ret) 510 return ret; 511 512 i915_gem_object_pin_pages(obj); 513 514 return ret; 515 } 516 517 /* Per-page copy function for the shmem pread fastpath. 518 * Flushes invalid cachelines before reading the target if 519 * needs_clflush is set. */ 520 static int 521 shmem_pread_fast(struct vm_page *page, int shmem_page_offset, int page_length, 522 char __user *user_data, 523 bool page_do_bit17_swizzling, bool needs_clflush) 524 { 525 char *vaddr; 526 int ret; 527 528 if (unlikely(page_do_bit17_swizzling)) 529 return -EINVAL; 530 531 vaddr = kmap_atomic(page); 532 if (needs_clflush) 533 drm_clflush_virt_range(vaddr + shmem_page_offset, 534 page_length); 535 ret = __copy_to_user_inatomic(user_data, 536 vaddr + shmem_page_offset, 537 page_length); 538 kunmap_atomic(vaddr); 539 540 return ret ? -EFAULT : 0; 541 } 542 543 static void 544 shmem_clflush_swizzled_range(char *addr, unsigned long length, 545 bool swizzled) 546 { 547 if (unlikely(swizzled)) { 548 unsigned long start = (unsigned long) addr; 549 unsigned long end = (unsigned long) addr + length; 550 551 /* For swizzling simply ensure that we always flush both 552 * channels. Lame, but simple and it works. Swizzled 553 * pwrite/pread is far from a hotpath - current userspace 554 * doesn't use it at all. */ 555 start = round_down(start, 128); 556 end = round_up(end, 128); 557 558 drm_clflush_virt_range((void *)start, end - start); 559 } else { 560 drm_clflush_virt_range(addr, length); 561 } 562 563 } 564 565 /* Only difference to the fast-path function is that this can handle bit17 566 * and uses non-atomic copy and kmap functions. */ 567 static int 568 shmem_pread_slow(struct vm_page *page, int shmem_page_offset, int page_length, 569 char __user *user_data, 570 bool page_do_bit17_swizzling, bool needs_clflush) 571 { 572 char *vaddr; 573 int ret; 574 575 vaddr = kmap(page); 576 if (needs_clflush) 577 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 578 page_length, 579 page_do_bit17_swizzling); 580 581 if (page_do_bit17_swizzling) 582 ret = __copy_to_user_swizzled(user_data, 583 vaddr, shmem_page_offset, 584 page_length); 585 else 586 ret = __copy_to_user(user_data, 587 vaddr + shmem_page_offset, 588 page_length); 589 kunmap(page); 590 591 return ret ? - EFAULT : 0; 592 } 593 594 static int 595 i915_gem_shmem_pread(struct drm_device *dev, 596 struct drm_i915_gem_object *obj, 597 struct drm_i915_gem_pread *args, 598 struct drm_file *file) 599 { 600 char __user *user_data; 601 ssize_t remain; 602 loff_t offset; 603 int shmem_page_offset, page_length, ret = 0; 604 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 605 int prefaulted = 0; 606 int needs_clflush = 0; 607 struct sg_page_iter sg_iter; 608 609 user_data = u64_to_user_ptr(args->data_ptr); 610 remain = args->size; 611 612 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 613 614 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); 615 if (ret) 616 return ret; 617 618 offset = args->offset; 619 620 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 621 offset >> PAGE_SHIFT) { 622 struct vm_page *page = sg_page_iter_page(&sg_iter); 623 624 if (remain <= 0) 625 break; 626 627 /* Operation in this page 628 * 629 * shmem_page_offset = offset within page in shmem file 630 * page_length = bytes to copy for this page 631 */ 632 shmem_page_offset = offset_in_page(offset); 633 page_length = remain; 634 if ((shmem_page_offset + page_length) > PAGE_SIZE) 635 page_length = PAGE_SIZE - shmem_page_offset; 636 637 page_do_bit17_swizzling = obj_do_bit17_swizzling && 638 (page_to_phys(page) & (1 << 17)) != 0; 639 640 ret = shmem_pread_fast(page, shmem_page_offset, page_length, 641 user_data, page_do_bit17_swizzling, 642 needs_clflush); 643 if (ret == 0) 644 goto next_page; 645 646 mutex_unlock(&dev->struct_mutex); 647 648 if (likely(!i915.prefault_disable) && !prefaulted) { 649 ret = fault_in_multipages_writeable(user_data, remain); 650 /* Userspace is tricking us, but we've already clobbered 651 * its pages with the prefault and promised to write the 652 * data up to the first fault. Hence ignore any errors 653 * and just continue. */ 654 (void)ret; 655 prefaulted = 1; 656 } 657 658 ret = shmem_pread_slow(page, shmem_page_offset, page_length, 659 user_data, page_do_bit17_swizzling, 660 needs_clflush); 661 662 mutex_lock(&dev->struct_mutex); 663 664 if (ret) 665 goto out; 666 667 next_page: 668 remain -= page_length; 669 user_data += page_length; 670 offset += page_length; 671 } 672 673 out: 674 i915_gem_object_unpin_pages(obj); 675 676 return ret; 677 } 678 679 /** 680 * Reads data from the object referenced by handle. 681 * 682 * On error, the contents of *data are undefined. 683 */ 684 int 685 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 686 struct drm_file *file) 687 { 688 struct drm_i915_gem_pread *args = data; 689 struct drm_i915_gem_object *obj; 690 int ret = 0; 691 692 if (args->size == 0) 693 return 0; 694 695 #if 0 696 if (!access_ok(VERIFY_WRITE, 697 u64_to_user_ptr(args->data_ptr), 698 args->size)) 699 return -EFAULT; 700 #endif 701 702 ret = i915_mutex_lock_interruptible(dev); 703 if (ret) 704 return ret; 705 706 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 707 if (&obj->base == NULL) { 708 ret = -ENOENT; 709 goto unlock; 710 } 711 712 /* Bounds check source. */ 713 if (args->offset > obj->base.size || 714 args->size > obj->base.size - args->offset) { 715 ret = -EINVAL; 716 goto out; 717 } 718 719 /* prime objects have no backing filp to GEM pread/pwrite 720 * pages from. 721 */ 722 723 trace_i915_gem_object_pread(obj, args->offset, args->size); 724 725 ret = i915_gem_shmem_pread(dev, obj, args, file); 726 727 out: 728 drm_gem_object_unreference(&obj->base); 729 unlock: 730 mutex_unlock(&dev->struct_mutex); 731 return ret; 732 } 733 734 /* This is the fast write path which cannot handle 735 * page faults in the source data 736 */ 737 738 static inline int 739 fast_user_write(struct io_mapping *mapping, 740 loff_t page_base, int page_offset, 741 char __user *user_data, 742 int length) 743 { 744 void __iomem *vaddr_atomic; 745 void *vaddr; 746 unsigned long unwritten; 747 748 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); 749 /* We can use the cpu mem copy function because this is X86. */ 750 vaddr = (char __force*)vaddr_atomic + page_offset; 751 unwritten = __copy_from_user_inatomic_nocache(vaddr, 752 user_data, length); 753 io_mapping_unmap_atomic(vaddr_atomic); 754 return unwritten; 755 } 756 757 /** 758 * This is the fast pwrite path, where we copy the data directly from the 759 * user into the GTT, uncached. 760 */ 761 static int 762 i915_gem_gtt_pwrite_fast(struct drm_device *dev, 763 struct drm_i915_gem_object *obj, 764 struct drm_i915_gem_pwrite *args, 765 struct drm_file *file) 766 { 767 struct drm_i915_private *dev_priv = to_i915(dev); 768 struct i915_ggtt *ggtt = &dev_priv->ggtt; 769 ssize_t remain; 770 loff_t offset, page_base; 771 char __user *user_data; 772 int page_offset, page_length, ret; 773 774 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK); 775 if (ret) 776 goto out; 777 778 ret = i915_gem_object_set_to_gtt_domain(obj, true); 779 if (ret) 780 goto out_unpin; 781 782 ret = i915_gem_object_put_fence(obj); 783 if (ret) 784 goto out_unpin; 785 786 user_data = u64_to_user_ptr(args->data_ptr); 787 remain = args->size; 788 789 offset = i915_gem_obj_ggtt_offset(obj) + args->offset; 790 791 intel_fb_obj_invalidate(obj, ORIGIN_GTT); 792 793 while (remain > 0) { 794 /* Operation in this page 795 * 796 * page_base = page offset within aperture 797 * page_offset = offset within page 798 * page_length = bytes to copy for this page 799 */ 800 page_base = offset & ~PAGE_MASK; 801 page_offset = offset_in_page(offset); 802 page_length = remain; 803 if ((page_offset + remain) > PAGE_SIZE) 804 page_length = PAGE_SIZE - page_offset; 805 806 /* If we get a fault while copying data, then (presumably) our 807 * source page isn't available. Return the error and we'll 808 * retry in the slow path. 809 */ 810 if (fast_user_write(ggtt->mappable, page_base, 811 page_offset, user_data, page_length)) { 812 ret = -EFAULT; 813 goto out_flush; 814 } 815 816 remain -= page_length; 817 user_data += page_length; 818 offset += page_length; 819 } 820 821 out_flush: 822 intel_fb_obj_flush(obj, false, ORIGIN_GTT); 823 out_unpin: 824 i915_gem_object_ggtt_unpin(obj); 825 out: 826 return ret; 827 } 828 829 /* Per-page copy function for the shmem pwrite fastpath. 830 * Flushes invalid cachelines before writing to the target if 831 * needs_clflush_before is set and flushes out any written cachelines after 832 * writing if needs_clflush is set. */ 833 static int 834 shmem_pwrite_fast(struct vm_page *page, int shmem_page_offset, int page_length, 835 char __user *user_data, 836 bool page_do_bit17_swizzling, 837 bool needs_clflush_before, 838 bool needs_clflush_after) 839 { 840 char *vaddr; 841 int ret; 842 843 if (unlikely(page_do_bit17_swizzling)) 844 return -EINVAL; 845 846 vaddr = kmap_atomic(page); 847 if (needs_clflush_before) 848 drm_clflush_virt_range(vaddr + shmem_page_offset, 849 page_length); 850 ret = __copy_from_user_inatomic(vaddr + shmem_page_offset, 851 user_data, page_length); 852 if (needs_clflush_after) 853 drm_clflush_virt_range(vaddr + shmem_page_offset, 854 page_length); 855 kunmap_atomic(vaddr); 856 857 return ret ? -EFAULT : 0; 858 } 859 860 /* Only difference to the fast-path function is that this can handle bit17 861 * and uses non-atomic copy and kmap functions. */ 862 static int 863 shmem_pwrite_slow(struct vm_page *page, int shmem_page_offset, int page_length, 864 char __user *user_data, 865 bool page_do_bit17_swizzling, 866 bool needs_clflush_before, 867 bool needs_clflush_after) 868 { 869 char *vaddr; 870 int ret; 871 872 vaddr = kmap(page); 873 if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) 874 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 875 page_length, 876 page_do_bit17_swizzling); 877 if (page_do_bit17_swizzling) 878 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset, 879 user_data, 880 page_length); 881 else 882 ret = __copy_from_user(vaddr + shmem_page_offset, 883 user_data, 884 page_length); 885 if (needs_clflush_after) 886 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 887 page_length, 888 page_do_bit17_swizzling); 889 kunmap(page); 890 891 return ret ? -EFAULT : 0; 892 } 893 894 static int 895 i915_gem_shmem_pwrite(struct drm_device *dev, 896 struct drm_i915_gem_object *obj, 897 struct drm_i915_gem_pwrite *args, 898 struct drm_file *file) 899 { 900 ssize_t remain; 901 loff_t offset; 902 char __user *user_data; 903 int shmem_page_offset, page_length, ret = 0; 904 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 905 int hit_slowpath = 0; 906 int needs_clflush_after = 0; 907 int needs_clflush_before = 0; 908 struct sg_page_iter sg_iter; 909 910 user_data = u64_to_user_ptr(args->data_ptr); 911 remain = args->size; 912 913 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 914 915 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 916 /* If we're not in the cpu write domain, set ourself into the gtt 917 * write domain and manually flush cachelines (if required). This 918 * optimizes for the case when the gpu will use the data 919 * right away and we therefore have to clflush anyway. */ 920 needs_clflush_after = cpu_write_needs_clflush(obj); 921 ret = i915_gem_object_wait_rendering(obj, false); 922 if (ret) 923 return ret; 924 } 925 /* Same trick applies to invalidate partially written cachelines read 926 * before writing. */ 927 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) 928 needs_clflush_before = 929 !cpu_cache_is_coherent(dev, obj->cache_level); 930 931 ret = i915_gem_object_get_pages(obj); 932 if (ret) 933 return ret; 934 935 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 936 937 i915_gem_object_pin_pages(obj); 938 939 offset = args->offset; 940 obj->dirty = 1; 941 942 VM_OBJECT_LOCK(obj->base.vm_obj); 943 vm_object_pip_add(obj->base.vm_obj, 1); 944 945 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 946 offset >> PAGE_SHIFT) { 947 struct vm_page *page = sg_page_iter_page(&sg_iter); 948 int partial_cacheline_write; 949 950 if (remain <= 0) 951 break; 952 953 /* Operation in this page 954 * 955 * shmem_page_offset = offset within page in shmem file 956 * page_length = bytes to copy for this page 957 */ 958 shmem_page_offset = offset_in_page(offset); 959 960 page_length = remain; 961 if ((shmem_page_offset + page_length) > PAGE_SIZE) 962 page_length = PAGE_SIZE - shmem_page_offset; 963 964 /* If we don't overwrite a cacheline completely we need to be 965 * careful to have up-to-date data by first clflushing. Don't 966 * overcomplicate things and flush the entire patch. */ 967 partial_cacheline_write = needs_clflush_before && 968 ((shmem_page_offset | page_length) 969 & (cpu_clflush_line_size - 1)); 970 971 page_do_bit17_swizzling = obj_do_bit17_swizzling && 972 (page_to_phys(page) & (1 << 17)) != 0; 973 974 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length, 975 user_data, page_do_bit17_swizzling, 976 partial_cacheline_write, 977 needs_clflush_after); 978 if (ret == 0) 979 goto next_page; 980 981 hit_slowpath = 1; 982 mutex_unlock(&dev->struct_mutex); 983 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length, 984 user_data, page_do_bit17_swizzling, 985 partial_cacheline_write, 986 needs_clflush_after); 987 988 mutex_lock(&dev->struct_mutex); 989 990 if (ret) 991 goto out; 992 993 next_page: 994 remain -= page_length; 995 user_data += page_length; 996 offset += page_length; 997 } 998 vm_object_pip_wakeup(obj->base.vm_obj); 999 VM_OBJECT_UNLOCK(obj->base.vm_obj); 1000 1001 out: 1002 i915_gem_object_unpin_pages(obj); 1003 1004 if (hit_slowpath) { 1005 /* 1006 * Fixup: Flush cpu caches in case we didn't flush the dirty 1007 * cachelines in-line while writing and the object moved 1008 * out of the cpu write domain while we've dropped the lock. 1009 */ 1010 if (!needs_clflush_after && 1011 obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 1012 if (i915_gem_clflush_object(obj, obj->pin_display)) 1013 needs_clflush_after = true; 1014 } 1015 } 1016 1017 if (needs_clflush_after) 1018 i915_gem_chipset_flush(dev); 1019 else 1020 obj->cache_dirty = true; 1021 1022 intel_fb_obj_flush(obj, false, ORIGIN_CPU); 1023 return ret; 1024 } 1025 1026 /** 1027 * Writes data to the object referenced by handle. 1028 * 1029 * On error, the contents of the buffer that were to be modified are undefined. 1030 */ 1031 int 1032 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 1033 struct drm_file *file) 1034 { 1035 struct drm_i915_private *dev_priv = dev->dev_private; 1036 struct drm_i915_gem_pwrite *args = data; 1037 struct drm_i915_gem_object *obj; 1038 int ret; 1039 1040 if (args->size == 0) 1041 return 0; 1042 1043 #if 0 1044 if (!access_ok(VERIFY_READ, 1045 u64_to_user_ptr(args->data_ptr), 1046 args->size)) 1047 return -EFAULT; 1048 #endif 1049 1050 if (likely(!i915.prefault_disable)) { 1051 ret = fault_in_multipages_readable(u64_to_user_ptr(args->data_ptr), 1052 args->size); 1053 if (ret) 1054 return -EFAULT; 1055 } 1056 1057 intel_runtime_pm_get(dev_priv); 1058 1059 ret = i915_mutex_lock_interruptible(dev); 1060 if (ret) 1061 goto put_rpm; 1062 1063 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 1064 if (&obj->base == NULL) { 1065 ret = -ENOENT; 1066 goto unlock; 1067 } 1068 1069 /* Bounds check destination. */ 1070 if (args->offset > obj->base.size || 1071 args->size > obj->base.size - args->offset) { 1072 ret = -EINVAL; 1073 goto out; 1074 } 1075 1076 /* prime objects have no backing filp to GEM pread/pwrite 1077 * pages from. 1078 */ 1079 1080 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 1081 1082 ret = -EFAULT; 1083 /* We can only do the GTT pwrite on untiled buffers, as otherwise 1084 * it would end up going through the fenced access, and we'll get 1085 * different detiling behavior between reading and writing. 1086 * pread/pwrite currently are reading and writing from the CPU 1087 * perspective, requiring manual detiling by the client. 1088 */ 1089 if (obj->tiling_mode == I915_TILING_NONE && 1090 obj->base.write_domain != I915_GEM_DOMAIN_CPU && 1091 cpu_write_needs_clflush(obj)) { 1092 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); 1093 /* Note that the gtt paths might fail with non-page-backed user 1094 * pointers (e.g. gtt mappings when moving data between 1095 * textures). Fallback to the shmem path in that case. */ 1096 } 1097 1098 if (ret == -EFAULT || ret == -ENOSPC) { 1099 if (obj->phys_handle) 1100 ret = i915_gem_phys_pwrite(obj, args, file); 1101 else 1102 ret = i915_gem_shmem_pwrite(dev, obj, args, file); 1103 } 1104 1105 out: 1106 drm_gem_object_unreference(&obj->base); 1107 unlock: 1108 mutex_unlock(&dev->struct_mutex); 1109 put_rpm: 1110 intel_runtime_pm_put(dev_priv); 1111 1112 return ret; 1113 } 1114 1115 static int 1116 i915_gem_check_wedge(unsigned reset_counter, bool interruptible) 1117 { 1118 if (__i915_terminally_wedged(reset_counter)) 1119 return -EIO; 1120 1121 if (__i915_reset_in_progress(reset_counter)) { 1122 /* Non-interruptible callers can't handle -EAGAIN, hence return 1123 * -EIO unconditionally for these. */ 1124 if (!interruptible) 1125 return -EIO; 1126 1127 return -EAGAIN; 1128 } 1129 1130 return 0; 1131 } 1132 1133 static void fake_irq(unsigned long data) 1134 { 1135 wakeup_one((void *)data); 1136 } 1137 1138 static bool missed_irq(struct drm_i915_private *dev_priv, 1139 struct intel_engine_cs *engine) 1140 { 1141 return test_bit(engine->id, &dev_priv->gpu_error.missed_irq_rings); 1142 } 1143 1144 #if 0 1145 static int __i915_spin_request(struct drm_i915_gem_request *req, int state) 1146 { 1147 unsigned long timeout; 1148 unsigned cpu; 1149 1150 /* When waiting for high frequency requests, e.g. during synchronous 1151 * rendering split between the CPU and GPU, the finite amount of time 1152 * required to set up the irq and wait upon it limits the response 1153 * rate. By busywaiting on the request completion for a short while we 1154 * can service the high frequency waits as quick as possible. However, 1155 * if it is a slow request, we want to sleep as quickly as possible. 1156 * The tradeoff between waiting and sleeping is roughly the time it 1157 * takes to sleep on a request, on the order of a microsecond. 1158 */ 1159 1160 if (req->engine->irq_refcount) 1161 return -EBUSY; 1162 1163 /* Only spin if we know the GPU is processing this request */ 1164 if (!i915_gem_request_started(req, true)) 1165 return -EAGAIN; 1166 1167 timeout = local_clock_us(&cpu) + 5; 1168 while (!need_resched()) { 1169 if (i915_gem_request_completed(req, true)) 1170 return 0; 1171 1172 if (signal_pending_state(state, current)) 1173 break; 1174 1175 if (busywait_stop(timeout, cpu)) 1176 break; 1177 1178 cpu_relax_lowlatency(); 1179 } 1180 1181 if (i915_gem_request_completed(req, false)) 1182 return 0; 1183 1184 return -EAGAIN; 1185 } 1186 #endif 1187 1188 /** 1189 * __i915_wait_request - wait until execution of request has finished 1190 * @req: duh! 1191 * @interruptible: do an interruptible wait (normally yes) 1192 * @timeout: in - how long to wait (NULL forever); out - how much time remaining 1193 * 1194 * Note: It is of utmost importance that the passed in seqno and reset_counter 1195 * values have been read by the caller in an smp safe manner. Where read-side 1196 * locks are involved, it is sufficient to read the reset_counter before 1197 * unlocking the lock that protects the seqno. For lockless tricks, the 1198 * reset_counter _must_ be read before, and an appropriate smp_rmb must be 1199 * inserted. 1200 * 1201 * Returns 0 if the request was found within the alloted time. Else returns the 1202 * errno with remaining time filled in timeout argument. 1203 */ 1204 int __i915_wait_request(struct drm_i915_gem_request *req, 1205 bool interruptible, 1206 s64 *timeout, 1207 struct intel_rps_client *rps) 1208 { 1209 struct intel_engine_cs *engine = i915_gem_request_get_engine(req); 1210 struct drm_device *dev = engine->dev; 1211 struct drm_i915_private *dev_priv = dev->dev_private; 1212 const bool irq_test_in_progress = 1213 ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_engine_flag(engine); 1214 unsigned long timeout_expire; 1215 s64 before = 0; /* Only to silence a compiler warning. */ 1216 int ret, sl_timeout = 1; 1217 1218 WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled"); 1219 1220 if (list_empty(&req->list)) 1221 return 0; 1222 1223 if (i915_gem_request_completed(req, true)) 1224 return 0; 1225 1226 timeout_expire = 0; 1227 if (timeout) { 1228 if (WARN_ON(*timeout < 0)) 1229 return -EINVAL; 1230 1231 if (*timeout == 0) 1232 return -ETIME; 1233 1234 timeout_expire = jiffies + nsecs_to_jiffies_timeout(*timeout); 1235 1236 /* 1237 * Record current time in case interrupted by signal, or wedged. 1238 */ 1239 before = ktime_get_raw_ns(); 1240 } 1241 1242 if (INTEL_INFO(dev_priv)->gen >= 6) 1243 gen6_rps_boost(dev_priv, rps, req->emitted_jiffies); 1244 1245 trace_i915_gem_request_wait_begin(req); 1246 1247 /* Optimistic spin for the next jiffie before touching IRQs */ 1248 #if 0 1249 ret = __i915_spin_request(req); 1250 if (ret == 0) 1251 goto out; 1252 #endif 1253 1254 if (!irq_test_in_progress && WARN_ON(!engine->irq_get(engine))) { 1255 ret = -ENODEV; 1256 goto out; 1257 } 1258 1259 lockmgr(&engine->irq_queue.lock, LK_EXCLUSIVE); 1260 for (;;) { 1261 struct timer_list timer; 1262 1263 /* We need to check whether any gpu reset happened in between 1264 * the request being submitted and now. If a reset has occurred, 1265 * the request is effectively complete (we either are in the 1266 * process of or have discarded the rendering and completely 1267 * reset the GPU. The results of the request are lost and we 1268 * are free to continue on with the original operation. 1269 */ 1270 if (req->reset_counter != i915_reset_counter(&dev_priv->gpu_error)) { 1271 ret = 0; 1272 break; 1273 } 1274 1275 if (i915_gem_request_completed(req, false)) { 1276 ret = 0; 1277 break; 1278 } 1279 1280 if (interruptible && signal_pending(curthread->td_lwp)) { 1281 ret = -ERESTARTSYS; 1282 break; 1283 } 1284 1285 if (timeout && time_after_eq(jiffies, timeout_expire)) { 1286 ret = -ETIME; 1287 break; 1288 } 1289 1290 timer.function = NULL; 1291 if (timeout || missed_irq(dev_priv, engine)) { 1292 unsigned long expire; 1293 1294 setup_timer_on_stack(&timer, fake_irq, (unsigned long)&engine->irq_queue); 1295 expire = missed_irq(dev_priv, engine) ? jiffies + 1 : timeout_expire; 1296 sl_timeout = expire - jiffies; 1297 if (sl_timeout < 1) 1298 sl_timeout = 1; 1299 mod_timer(&timer, expire); 1300 } 1301 1302 #if 0 1303 io_schedule(); 1304 #endif 1305 1306 if (timer.function) { 1307 del_singleshot_timer_sync(&timer); 1308 destroy_timer_on_stack(&timer); 1309 } 1310 1311 lksleep(&engine->irq_queue, &engine->irq_queue.lock, 1312 interruptible ? PCATCH : 0, "lwe", sl_timeout); 1313 } 1314 lockmgr(&engine->irq_queue.lock, LK_RELEASE); 1315 if (!irq_test_in_progress) 1316 engine->irq_put(engine); 1317 1318 out: 1319 trace_i915_gem_request_wait_end(req); 1320 1321 if (timeout) { 1322 s64 tres = *timeout - (ktime_get_raw_ns() - before); 1323 1324 *timeout = tres < 0 ? 0 : tres; 1325 1326 /* 1327 * Apparently ktime isn't accurate enough and occasionally has a 1328 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch 1329 * things up to make the test happy. We allow up to 1 jiffy. 1330 * 1331 * This is a regrssion from the timespec->ktime conversion. 1332 */ 1333 if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000) 1334 *timeout = 0; 1335 } 1336 1337 return ret; 1338 } 1339 1340 int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, 1341 struct drm_file *file) 1342 { 1343 struct drm_i915_file_private *file_priv; 1344 1345 WARN_ON(!req || !file || req->file_priv); 1346 1347 if (!req || !file) 1348 return -EINVAL; 1349 1350 if (req->file_priv) 1351 return -EINVAL; 1352 1353 file_priv = file->driver_priv; 1354 1355 spin_lock(&file_priv->mm.lock); 1356 req->file_priv = file_priv; 1357 list_add_tail(&req->client_list, &file_priv->mm.request_list); 1358 spin_unlock(&file_priv->mm.lock); 1359 1360 req->pid = curproc->p_pid; 1361 1362 return 0; 1363 } 1364 1365 static inline void 1366 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) 1367 { 1368 struct drm_i915_file_private *file_priv = request->file_priv; 1369 1370 if (!file_priv) 1371 return; 1372 1373 spin_lock(&file_priv->mm.lock); 1374 list_del(&request->client_list); 1375 request->file_priv = NULL; 1376 spin_unlock(&file_priv->mm.lock); 1377 1378 #if 0 1379 put_pid(request->pid); 1380 request->pid = NULL; 1381 #endif 1382 } 1383 1384 static void i915_gem_request_retire(struct drm_i915_gem_request *request) 1385 { 1386 trace_i915_gem_request_retire(request); 1387 1388 /* We know the GPU must have read the request to have 1389 * sent us the seqno + interrupt, so use the position 1390 * of tail of the request to update the last known position 1391 * of the GPU head. 1392 * 1393 * Note this requires that we are always called in request 1394 * completion order. 1395 */ 1396 request->ringbuf->last_retired_head = request->postfix; 1397 1398 list_del_init(&request->list); 1399 i915_gem_request_remove_from_client(request); 1400 1401 i915_gem_request_unreference(request); 1402 } 1403 1404 static void 1405 __i915_gem_request_retire__upto(struct drm_i915_gem_request *req) 1406 { 1407 struct intel_engine_cs *engine = req->engine; 1408 struct drm_i915_gem_request *tmp; 1409 1410 lockdep_assert_held(&engine->dev->struct_mutex); 1411 1412 if (list_empty(&req->list)) 1413 return; 1414 1415 do { 1416 tmp = list_first_entry(&engine->request_list, 1417 typeof(*tmp), list); 1418 1419 i915_gem_request_retire(tmp); 1420 } while (tmp != req); 1421 1422 WARN_ON(i915_verify_lists(engine->dev)); 1423 } 1424 1425 /** 1426 * Waits for a request to be signaled, and cleans up the 1427 * request and object lists appropriately for that event. 1428 */ 1429 int 1430 i915_wait_request(struct drm_i915_gem_request *req) 1431 { 1432 struct drm_i915_private *dev_priv = req->i915; 1433 bool interruptible; 1434 int ret; 1435 1436 interruptible = dev_priv->mm.interruptible; 1437 1438 BUG_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex)); 1439 1440 ret = __i915_wait_request(req, interruptible, NULL, NULL); 1441 if (ret) 1442 return ret; 1443 1444 /* If the GPU hung, we want to keep the requests to find the guilty. */ 1445 if (req->reset_counter == i915_reset_counter(&dev_priv->gpu_error)) 1446 __i915_gem_request_retire__upto(req); 1447 1448 return 0; 1449 } 1450 1451 /** 1452 * Ensures that all rendering to the object has completed and the object is 1453 * safe to unbind from the GTT or access from the CPU. 1454 */ 1455 int 1456 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, 1457 bool readonly) 1458 { 1459 int ret, i; 1460 1461 if (!obj->active) 1462 return 0; 1463 1464 if (readonly) { 1465 if (obj->last_write_req != NULL) { 1466 ret = i915_wait_request(obj->last_write_req); 1467 if (ret) 1468 return ret; 1469 1470 i = obj->last_write_req->engine->id; 1471 if (obj->last_read_req[i] == obj->last_write_req) 1472 i915_gem_object_retire__read(obj, i); 1473 else 1474 i915_gem_object_retire__write(obj); 1475 } 1476 } else { 1477 for (i = 0; i < I915_NUM_ENGINES; i++) { 1478 if (obj->last_read_req[i] == NULL) 1479 continue; 1480 1481 ret = i915_wait_request(obj->last_read_req[i]); 1482 if (ret) 1483 return ret; 1484 1485 i915_gem_object_retire__read(obj, i); 1486 } 1487 GEM_BUG_ON(obj->active); 1488 } 1489 1490 return 0; 1491 } 1492 1493 static void 1494 i915_gem_object_retire_request(struct drm_i915_gem_object *obj, 1495 struct drm_i915_gem_request *req) 1496 { 1497 int ring = req->engine->id; 1498 1499 if (obj->last_read_req[ring] == req) 1500 i915_gem_object_retire__read(obj, ring); 1501 else if (obj->last_write_req == req) 1502 i915_gem_object_retire__write(obj); 1503 1504 if (req->reset_counter == i915_reset_counter(&req->i915->gpu_error)) 1505 __i915_gem_request_retire__upto(req); 1506 } 1507 1508 /* A nonblocking variant of the above wait. This is a highly dangerous routine 1509 * as the object state may change during this call. 1510 */ 1511 static __must_check int 1512 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, 1513 struct intel_rps_client *rps, 1514 bool readonly) 1515 { 1516 struct drm_device *dev = obj->base.dev; 1517 struct drm_i915_private *dev_priv = dev->dev_private; 1518 struct drm_i915_gem_request *requests[I915_NUM_ENGINES]; 1519 int ret, i, n = 0; 1520 1521 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1522 BUG_ON(!dev_priv->mm.interruptible); 1523 1524 if (!obj->active) 1525 return 0; 1526 1527 if (readonly) { 1528 struct drm_i915_gem_request *req; 1529 1530 req = obj->last_write_req; 1531 if (req == NULL) 1532 return 0; 1533 1534 requests[n++] = i915_gem_request_reference(req); 1535 } else { 1536 for (i = 0; i < I915_NUM_ENGINES; i++) { 1537 struct drm_i915_gem_request *req; 1538 1539 req = obj->last_read_req[i]; 1540 if (req == NULL) 1541 continue; 1542 1543 requests[n++] = i915_gem_request_reference(req); 1544 } 1545 } 1546 1547 mutex_unlock(&dev->struct_mutex); 1548 ret = 0; 1549 for (i = 0; ret == 0 && i < n; i++) 1550 ret = __i915_wait_request(requests[i], true, NULL, rps); 1551 mutex_lock(&dev->struct_mutex); 1552 1553 for (i = 0; i < n; i++) { 1554 if (ret == 0) 1555 i915_gem_object_retire_request(obj, requests[i]); 1556 i915_gem_request_unreference(requests[i]); 1557 } 1558 1559 return ret; 1560 } 1561 1562 static struct intel_rps_client *to_rps_client(struct drm_file *file) 1563 { 1564 struct drm_i915_file_private *fpriv = file->driver_priv; 1565 return &fpriv->rps; 1566 } 1567 1568 /** 1569 * Called when user space prepares to use an object with the CPU, either 1570 * through the mmap ioctl's mapping or a GTT mapping. 1571 */ 1572 int 1573 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1574 struct drm_file *file) 1575 { 1576 struct drm_i915_gem_set_domain *args = data; 1577 struct drm_i915_gem_object *obj; 1578 uint32_t read_domains = args->read_domains; 1579 uint32_t write_domain = args->write_domain; 1580 int ret; 1581 1582 /* Only handle setting domains to types used by the CPU. */ 1583 if (write_domain & I915_GEM_GPU_DOMAINS) 1584 return -EINVAL; 1585 1586 if (read_domains & I915_GEM_GPU_DOMAINS) 1587 return -EINVAL; 1588 1589 /* Having something in the write domain implies it's in the read 1590 * domain, and only that read domain. Enforce that in the request. 1591 */ 1592 if (write_domain != 0 && read_domains != write_domain) 1593 return -EINVAL; 1594 1595 ret = i915_mutex_lock_interruptible(dev); 1596 if (ret) 1597 return ret; 1598 1599 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 1600 if (&obj->base == NULL) { 1601 ret = -ENOENT; 1602 goto unlock; 1603 } 1604 1605 /* Try to flush the object off the GPU without holding the lock. 1606 * We will repeat the flush holding the lock in the normal manner 1607 * to catch cases where we are gazumped. 1608 */ 1609 ret = i915_gem_object_wait_rendering__nonblocking(obj, 1610 to_rps_client(file), 1611 !write_domain); 1612 if (ret) 1613 goto unref; 1614 1615 if (read_domains & I915_GEM_DOMAIN_GTT) 1616 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 1617 else 1618 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 1619 1620 if (write_domain != 0) 1621 intel_fb_obj_invalidate(obj, 1622 write_domain == I915_GEM_DOMAIN_GTT ? 1623 ORIGIN_GTT : ORIGIN_CPU); 1624 1625 unref: 1626 drm_gem_object_unreference(&obj->base); 1627 unlock: 1628 mutex_unlock(&dev->struct_mutex); 1629 return ret; 1630 } 1631 1632 /** 1633 * Called when user space has done writes to this buffer 1634 */ 1635 int 1636 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1637 struct drm_file *file) 1638 { 1639 struct drm_i915_gem_sw_finish *args = data; 1640 struct drm_i915_gem_object *obj; 1641 int ret = 0; 1642 1643 ret = i915_mutex_lock_interruptible(dev); 1644 if (ret) 1645 return ret; 1646 1647 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 1648 if (&obj->base == NULL) { 1649 ret = -ENOENT; 1650 goto unlock; 1651 } 1652 1653 /* Pinned buffers may be scanout, so flush the cache */ 1654 if (obj->pin_display) 1655 i915_gem_object_flush_cpu_write_domain(obj); 1656 1657 drm_gem_object_unreference(&obj->base); 1658 unlock: 1659 mutex_unlock(&dev->struct_mutex); 1660 return ret; 1661 } 1662 1663 /** 1664 * Maps the contents of an object, returning the address it is mapped 1665 * into. 1666 * 1667 * While the mapping holds a reference on the contents of the object, it doesn't 1668 * imply a ref on the object itself. 1669 * 1670 * IMPORTANT: 1671 * 1672 * DRM driver writers who look a this function as an example for how to do GEM 1673 * mmap support, please don't implement mmap support like here. The modern way 1674 * to implement DRM mmap support is with an mmap offset ioctl (like 1675 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly. 1676 * That way debug tooling like valgrind will understand what's going on, hiding 1677 * the mmap call in a driver private ioctl will break that. The i915 driver only 1678 * does cpu mmaps this way because we didn't know better. 1679 */ 1680 int 1681 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1682 struct drm_file *file) 1683 { 1684 struct drm_i915_gem_mmap *args = data; 1685 struct drm_gem_object *obj; 1686 unsigned long addr; 1687 1688 struct proc *p = curproc; 1689 vm_map_t map = &p->p_vmspace->vm_map; 1690 vm_size_t size; 1691 int error = 0, rv; 1692 1693 if (args->flags & ~(I915_MMAP_WC)) 1694 return -EINVAL; 1695 1696 obj = drm_gem_object_lookup(file, args->handle); 1697 if (obj == NULL) 1698 return -ENOENT; 1699 1700 if (args->size == 0) 1701 goto out; 1702 1703 size = round_page(args->size); 1704 if (map->size + size > p->p_rlimit[RLIMIT_VMEM].rlim_cur) { 1705 error = -ENOMEM; 1706 goto out; 1707 } 1708 1709 /* prime objects have no backing filp to GEM mmap 1710 * pages from. 1711 */ 1712 1713 /* 1714 * Call hint to ensure that NULL is not returned as a valid address 1715 * and to reduce vm_map traversals. XXX causes instability, use a 1716 * fixed low address as the start point instead to avoid the NULL 1717 * return issue. 1718 */ 1719 1720 addr = PAGE_SIZE; 1721 1722 /* 1723 * Use 256KB alignment. It is unclear why this matters for a 1724 * virtual address but it appears to fix a number of application/X 1725 * crashes and kms console switching is much faster. 1726 */ 1727 vm_object_hold(obj->vm_obj); 1728 vm_object_reference_locked(obj->vm_obj); 1729 vm_object_drop(obj->vm_obj); 1730 1731 rv = vm_map_find(map, obj->vm_obj, NULL, 1732 args->offset, &addr, args->size, 1733 256 * 1024, /* align */ 1734 TRUE, /* fitit */ 1735 VM_MAPTYPE_NORMAL, VM_SUBSYS_DRM_GEM, 1736 VM_PROT_READ | VM_PROT_WRITE, /* prot */ 1737 VM_PROT_READ | VM_PROT_WRITE, /* max */ 1738 MAP_SHARED /* cow */); 1739 if (rv != KERN_SUCCESS) { 1740 vm_object_deallocate(obj->vm_obj); 1741 error = -vm_mmap_to_errno(rv); 1742 } else { 1743 args->addr_ptr = (uint64_t)addr; 1744 } 1745 out: 1746 drm_gem_object_unreference(obj); 1747 return (error); 1748 } 1749 1750 /** 1751 * i915_gem_fault - fault a page into the GTT 1752 * 1753 * vm_obj is locked on entry and expected to be locked on return. 1754 * 1755 * The vm_pager has placemarked the object with an anonymous memory page 1756 * which we must replace atomically to avoid races against concurrent faults 1757 * on the same page. XXX we currently are unable to do this atomically. 1758 * 1759 * If we are to return an error we should not touch the anonymous page, 1760 * the caller will deallocate it. 1761 * 1762 * XXX Most GEM calls appear to be interruptable, but we can't hard loop 1763 * in that case. Release all resources and wait 1 tick before retrying. 1764 * This is a huge problem which needs to be fixed by getting rid of most 1765 * of the interruptability. The linux code does not retry but does appear 1766 * to have some sort of mechanism (VM_FAULT_NOPAGE ?) for the higher level 1767 * to be able to retry. 1768 * 1769 * -- 1770 * @vma: VMA in question 1771 * @vmf: fault info 1772 * 1773 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1774 * from userspace. The fault handler takes care of binding the object to 1775 * the GTT (if needed), allocating and programming a fence register (again, 1776 * only if needed based on whether the old reg is still valid or the object 1777 * is tiled) and inserting a new PTE into the faulting process. 1778 * 1779 * Note that the faulting process may involve evicting existing objects 1780 * from the GTT and/or fence registers to make room. So performance may 1781 * suffer if the GTT working set is large or there are few fence registers 1782 * left. 1783 * 1784 * vm_obj is locked on entry and expected to be locked on return. The VM 1785 * pager has placed an anonymous memory page at (obj,offset) which we have 1786 * to replace. 1787 */ 1788 int i915_gem_fault(vm_object_t vm_obj, vm_ooffset_t offset, int prot, vm_page_t *mres) 1789 { 1790 struct drm_i915_gem_object *obj = to_intel_bo(vm_obj->handle); 1791 struct drm_device *dev = obj->base.dev; 1792 struct drm_i915_private *dev_priv = to_i915(dev); 1793 struct i915_ggtt *ggtt = &dev_priv->ggtt; 1794 struct i915_ggtt_view view = i915_ggtt_view_normal; 1795 unsigned long page_offset; 1796 vm_page_t m; 1797 int ret = 0; 1798 bool write = !!(prot & VM_PROT_WRITE); 1799 1800 intel_runtime_pm_get(dev_priv); 1801 1802 /* We don't use vmf->pgoff since that has the fake offset */ 1803 page_offset = (unsigned long)offset; 1804 1805 /* 1806 * vm_fault() has supplied us with a busied page placeholding 1807 * the operation. This presents a lock order reversal issue 1808 * again i915_gem_release_mmap() for our device mutex. 1809 * 1810 * Deal with the problem by getting rid of the placeholder now, 1811 * and then dealing with the potential for a new placeholder when 1812 * we try to insert later. 1813 */ 1814 if (*mres != NULL) { 1815 m = *mres; 1816 *mres = NULL; 1817 if ((m->busy_count & PBUSY_LOCKED) == 0) 1818 kprintf("i915_gem_fault: Page was not busy\n"); 1819 else 1820 vm_page_remove(m); 1821 vm_page_free(m); 1822 } 1823 1824 m = NULL; 1825 1826 retry: 1827 ret = i915_mutex_lock_interruptible(dev); 1828 if (ret) 1829 goto out; 1830 1831 trace_i915_gem_object_fault(obj, page_offset, true, write); 1832 1833 /* Try to flush the object off the GPU first without holding the lock. 1834 * Upon reacquiring the lock, we will perform our sanity checks and then 1835 * repeat the flush holding the lock in the normal manner to catch cases 1836 * where we are gazumped. 1837 */ 1838 ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write); 1839 if (ret) 1840 goto unlock; 1841 1842 /* Access to snoopable pages through the GTT is incoherent. */ 1843 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) { 1844 ret = -EFAULT; 1845 goto unlock; 1846 } 1847 1848 /* Use a partial view if the object is bigger than the aperture. */ 1849 if (obj->base.size >= ggtt->mappable_end && 1850 obj->tiling_mode == I915_TILING_NONE) { 1851 #if 0 1852 static const unsigned int chunk_size = 256; // 1 MiB 1853 1854 memset(&view, 0, sizeof(view)); 1855 view.type = I915_GGTT_VIEW_PARTIAL; 1856 view.params.partial.offset = rounddown(page_offset, chunk_size); 1857 view.params.partial.size = 1858 min_t(unsigned int, 1859 chunk_size, 1860 (vma->vm_end - vma->vm_start)/PAGE_SIZE - 1861 view.params.partial.offset); 1862 #endif 1863 } 1864 1865 /* Now pin it into the GTT if needed */ 1866 ret = i915_gem_object_ggtt_pin(obj, &view, 0, PIN_MAPPABLE); 1867 if (ret) 1868 goto unlock; 1869 1870 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1871 if (ret) 1872 goto unpin; 1873 1874 ret = i915_gem_object_get_fence(obj); 1875 if (ret) 1876 goto unpin; 1877 1878 /* 1879 * START FREEBSD MAGIC 1880 * 1881 * Add a pip count to avoid destruction and certain other 1882 * complex operations (such as collapses?) while unlocked. 1883 */ 1884 vm_object_pip_add(vm_obj, 1); 1885 1886 ret = 0; 1887 m = NULL; 1888 1889 /* 1890 * Since the object lock was dropped, another thread might have 1891 * faulted on the same GTT address and instantiated the mapping. 1892 * Recheck. 1893 */ 1894 m = vm_page_lookup(vm_obj, OFF_TO_IDX(offset)); 1895 if (m != NULL) { 1896 /* 1897 * Try to busy the page, retry on failure (non-zero ret). 1898 */ 1899 if (vm_page_busy_try(m, false)) { 1900 kprintf("i915_gem_fault: BUSY\n"); 1901 ret = -EINTR; 1902 goto unlock; 1903 } 1904 goto have_page; 1905 } 1906 /* 1907 * END FREEBSD MAGIC 1908 */ 1909 1910 obj->fault_mappable = true; 1911 1912 /* Finally, remap it using the new GTT offset */ 1913 m = vm_phys_fictitious_to_vm_page(ggtt->mappable_base + 1914 i915_gem_obj_ggtt_offset_view(obj, &view) + offset); 1915 if (m == NULL) { 1916 ret = -EFAULT; 1917 goto unpin; 1918 } 1919 KASSERT((m->flags & PG_FICTITIOUS) != 0, ("not fictitious %p", m)); 1920 KASSERT(m->wire_count == 1, ("wire_count not 1 %p", m)); 1921 1922 /* 1923 * Try to busy the page. Fails on non-zero return. 1924 */ 1925 if (vm_page_busy_try(m, false)) { 1926 kprintf("i915_gem_fault: BUSY(2)\n"); 1927 ret = -EINTR; 1928 goto unpin; 1929 } 1930 m->valid = VM_PAGE_BITS_ALL; 1931 1932 #if 1 1933 /* 1934 * This should always work since we already checked via a lookup 1935 * above. 1936 */ 1937 if (vm_page_insert(m, vm_obj, OFF_TO_IDX(offset)) == FALSE) { 1938 kprintf("i915:gem_fault: page %p,%jd already in object\n", 1939 vm_obj, 1940 OFF_TO_IDX(offset)); 1941 vm_page_wakeup(m); 1942 ret = -EINTR; 1943 goto unpin; 1944 } 1945 #else 1946 /* NOT COMPILED ATM */ 1947 if (unlikely(view.type == I915_GGTT_VIEW_PARTIAL)) { 1948 /* Overriding existing pages in partial view does not cause 1949 * us any trouble as TLBs are still valid because the fault 1950 * is due to userspace losing part of the mapping or never 1951 * having accessed it before (at this partials' range). 1952 */ 1953 unsigned long base = vma->vm_start + 1954 (view.params.partial.offset << PAGE_SHIFT); 1955 unsigned int i; 1956 1957 for (i = 0; i < view.params.partial.size; i++) { 1958 ret = vm_insert_pfn(vma, base + i * PAGE_SIZE, pfn + i); 1959 if (ret) 1960 break; 1961 } 1962 1963 obj->fault_mappable = true; 1964 } else { 1965 if (!obj->fault_mappable) { 1966 unsigned long size = min_t(unsigned long, 1967 vma->vm_end - vma->vm_start, 1968 obj->base.size); 1969 int i; 1970 1971 for (i = 0; i < size >> PAGE_SHIFT; i++) { 1972 ret = vm_insert_pfn(vma, 1973 (unsigned long)vma->vm_start + i * PAGE_SIZE, 1974 pfn + i); 1975 if (ret) 1976 break; 1977 } 1978 1979 obj->fault_mappable = true; 1980 } else 1981 ret = vm_insert_pfn(vma, 1982 (unsigned long)vmf->virtual_address, 1983 pfn + page_offset); 1984 } 1985 #endif 1986 1987 have_page: 1988 *mres = m; 1989 1990 i915_gem_object_ggtt_unpin_view(obj, &view); 1991 mutex_unlock(&dev->struct_mutex); 1992 ret = VM_PAGER_OK; 1993 goto done; 1994 1995 /* 1996 * ALTERNATIVE ERROR RETURN. 1997 * 1998 * OBJECT EXPECTED TO BE LOCKED. 1999 */ 2000 unpin: 2001 i915_gem_object_ggtt_unpin_view(obj, &view); 2002 unlock: 2003 mutex_unlock(&dev->struct_mutex); 2004 out: 2005 switch (ret) { 2006 case -EIO: 2007 /* 2008 * We eat errors when the gpu is terminally wedged to avoid 2009 * userspace unduly crashing (gl has no provisions for mmaps to 2010 * fail). But any other -EIO isn't ours (e.g. swap in failure) 2011 * and so needs to be reported. 2012 */ 2013 if (!i915_terminally_wedged(&dev_priv->gpu_error)) { 2014 // ret = VM_FAULT_SIGBUS; 2015 break; 2016 } 2017 case -EAGAIN: 2018 /* 2019 * EAGAIN means the gpu is hung and we'll wait for the error 2020 * handler to reset everything when re-faulting in 2021 * i915_mutex_lock_interruptible. 2022 */ 2023 case -ERESTARTSYS: 2024 case -EINTR: 2025 VM_OBJECT_UNLOCK(vm_obj); 2026 int dummy; 2027 tsleep(&dummy, 0, "delay", 1); /* XXX */ 2028 VM_OBJECT_LOCK(vm_obj); 2029 goto retry; 2030 default: 2031 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 2032 ret = VM_PAGER_ERROR; 2033 break; 2034 } 2035 2036 done: 2037 vm_object_pip_wakeup(vm_obj); 2038 2039 intel_runtime_pm_put(dev_priv); 2040 return ret; 2041 } 2042 2043 /** 2044 * i915_gem_release_mmap - remove physical page mappings 2045 * @obj: obj in question 2046 * 2047 * Preserve the reservation of the mmapping with the DRM core code, but 2048 * relinquish ownership of the pages back to the system. 2049 * 2050 * It is vital that we remove the page mapping if we have mapped a tiled 2051 * object through the GTT and then lose the fence register due to 2052 * resource pressure. Similarly if the object has been moved out of the 2053 * aperture, than pages mapped into userspace must be revoked. Removing the 2054 * mapping will then trigger a page fault on the next user access, allowing 2055 * fixup by i915_gem_fault(). 2056 */ 2057 void 2058 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 2059 { 2060 vm_object_t devobj; 2061 vm_page_t m; 2062 int i, page_count; 2063 2064 /* Serialisation between user GTT access and our code depends upon 2065 * revoking the CPU's PTE whilst the mutex is held. The next user 2066 * pagefault then has to wait until we release the mutex. 2067 */ 2068 lockdep_assert_held(&obj->base.dev->struct_mutex); 2069 2070 if (!obj->fault_mappable) 2071 return; 2072 2073 devobj = cdev_pager_lookup(obj); 2074 if (devobj != NULL) { 2075 page_count = OFF_TO_IDX(obj->base.size); 2076 2077 VM_OBJECT_LOCK(devobj); 2078 for (i = 0; i < page_count; i++) { 2079 m = vm_page_lookup_busy_wait(devobj, i, TRUE, "915unm"); 2080 if (m == NULL) 2081 continue; 2082 cdev_pager_free_page(devobj, m); 2083 } 2084 VM_OBJECT_UNLOCK(devobj); 2085 vm_object_deallocate(devobj); 2086 } 2087 2088 /* Ensure that the CPU's PTE are revoked and there are not outstanding 2089 * memory transactions from userspace before we return. The TLB 2090 * flushing implied above by changing the PTE above *should* be 2091 * sufficient, an extra barrier here just provides us with a bit 2092 * of paranoid documentation about our requirement to serialise 2093 * memory writes before touching registers / GSM. 2094 */ 2095 wmb(); 2096 2097 obj->fault_mappable = false; 2098 } 2099 2100 void 2101 i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv) 2102 { 2103 struct drm_i915_gem_object *obj; 2104 2105 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) 2106 i915_gem_release_mmap(obj); 2107 } 2108 2109 uint32_t 2110 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) 2111 { 2112 uint32_t gtt_size; 2113 2114 if (INTEL_INFO(dev)->gen >= 4 || 2115 tiling_mode == I915_TILING_NONE) 2116 return size; 2117 2118 /* Previous chips need a power-of-two fence region when tiling */ 2119 if (INTEL_INFO(dev)->gen == 3) 2120 gtt_size = 1024*1024; 2121 else 2122 gtt_size = 512*1024; 2123 2124 while (gtt_size < size) 2125 gtt_size <<= 1; 2126 2127 return gtt_size; 2128 } 2129 2130 /** 2131 * i915_gem_get_gtt_alignment - return required GTT alignment for an object 2132 * @obj: object to check 2133 * 2134 * Return the required GTT alignment for an object, taking into account 2135 * potential fence register mapping. 2136 */ 2137 uint32_t 2138 i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size, 2139 int tiling_mode, bool fenced) 2140 { 2141 /* 2142 * Minimum alignment is 4k (GTT page size), but might be greater 2143 * if a fence register is needed for the object. 2144 */ 2145 if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) || 2146 tiling_mode == I915_TILING_NONE) 2147 return 4096; 2148 2149 /* 2150 * Previous chips need to be aligned to the size of the smallest 2151 * fence register that can contain the object. 2152 */ 2153 return i915_gem_get_gtt_size(dev, size, tiling_mode); 2154 } 2155 2156 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) 2157 { 2158 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2159 int ret; 2160 2161 #if 0 2162 if (drm_vma_node_has_offset(&obj->base.vma_node)) 2163 return 0; 2164 #endif 2165 2166 dev_priv->mm.shrinker_no_lock_stealing = true; 2167 2168 ret = drm_gem_create_mmap_offset(&obj->base); 2169 if (ret != -ENOSPC) 2170 goto out; 2171 2172 /* Badly fragmented mmap space? The only way we can recover 2173 * space is by destroying unwanted objects. We can't randomly release 2174 * mmap_offsets as userspace expects them to be persistent for the 2175 * lifetime of the objects. The closest we can is to release the 2176 * offsets on purgeable objects by truncating it and marking it purged, 2177 * which prevents userspace from ever using that object again. 2178 */ 2179 i915_gem_shrink(dev_priv, 2180 obj->base.size >> PAGE_SHIFT, 2181 I915_SHRINK_BOUND | 2182 I915_SHRINK_UNBOUND | 2183 I915_SHRINK_PURGEABLE); 2184 ret = drm_gem_create_mmap_offset(&obj->base); 2185 if (ret != -ENOSPC) 2186 goto out; 2187 2188 i915_gem_shrink_all(dev_priv); 2189 ret = drm_gem_create_mmap_offset(&obj->base); 2190 out: 2191 dev_priv->mm.shrinker_no_lock_stealing = false; 2192 2193 return ret; 2194 } 2195 2196 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) 2197 { 2198 drm_gem_free_mmap_offset(&obj->base); 2199 } 2200 2201 int 2202 i915_gem_mmap_gtt(struct drm_file *file, 2203 struct drm_device *dev, 2204 uint32_t handle, 2205 uint64_t *offset) 2206 { 2207 struct drm_i915_gem_object *obj; 2208 int ret; 2209 2210 ret = i915_mutex_lock_interruptible(dev); 2211 if (ret) 2212 return ret; 2213 2214 obj = to_intel_bo(drm_gem_object_lookup(file, handle)); 2215 if (&obj->base == NULL) { 2216 ret = -ENOENT; 2217 goto unlock; 2218 } 2219 2220 if (obj->madv != I915_MADV_WILLNEED) { 2221 DRM_DEBUG("Attempting to mmap a purgeable buffer\n"); 2222 ret = -EFAULT; 2223 goto out; 2224 } 2225 2226 ret = i915_gem_object_create_mmap_offset(obj); 2227 if (ret) 2228 goto out; 2229 2230 *offset = DRM_GEM_MAPPING_OFF(obj->base.map_list.key) | 2231 DRM_GEM_MAPPING_KEY; 2232 2233 out: 2234 drm_gem_object_unreference(&obj->base); 2235 unlock: 2236 mutex_unlock(&dev->struct_mutex); 2237 return ret; 2238 } 2239 2240 /** 2241 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 2242 * @dev: DRM device 2243 * @data: GTT mapping ioctl data 2244 * @file: GEM object info 2245 * 2246 * Simply returns the fake offset to userspace so it can mmap it. 2247 * The mmap call will end up in drm_gem_mmap(), which will set things 2248 * up so we can get faults in the handler above. 2249 * 2250 * The fault handler will take care of binding the object into the GTT 2251 * (since it may have been evicted to make room for something), allocating 2252 * a fence register, and mapping the appropriate aperture address into 2253 * userspace. 2254 */ 2255 int 2256 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 2257 struct drm_file *file) 2258 { 2259 struct drm_i915_gem_mmap_gtt *args = data; 2260 2261 return i915_gem_mmap_gtt(file, dev, args->handle, (uint64_t *)&args->offset); 2262 } 2263 2264 /* Immediately discard the backing storage */ 2265 static void 2266 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 2267 { 2268 vm_object_t vm_obj; 2269 2270 vm_obj = obj->base.vm_obj; 2271 VM_OBJECT_LOCK(vm_obj); 2272 vm_object_page_remove(vm_obj, 0, 0, false); 2273 VM_OBJECT_UNLOCK(vm_obj); 2274 2275 obj->madv = __I915_MADV_PURGED; 2276 } 2277 2278 /* Try to discard unwanted pages */ 2279 static void 2280 i915_gem_object_invalidate(struct drm_i915_gem_object *obj) 2281 { 2282 #if 0 2283 struct address_space *mapping; 2284 #endif 2285 2286 switch (obj->madv) { 2287 case I915_MADV_DONTNEED: 2288 i915_gem_object_truncate(obj); 2289 case __I915_MADV_PURGED: 2290 return; 2291 } 2292 2293 #if 0 2294 if (obj->base.filp == NULL) 2295 return; 2296 2297 mapping = file_inode(obj->base.filp)->i_mapping, 2298 invalidate_mapping_pages(mapping, 0, (loff_t)-1); 2299 #endif 2300 } 2301 2302 static void 2303 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) 2304 { 2305 struct sg_page_iter sg_iter; 2306 int ret; 2307 2308 BUG_ON(obj->madv == __I915_MADV_PURGED); 2309 2310 ret = i915_gem_object_set_to_cpu_domain(obj, true); 2311 if (WARN_ON(ret)) { 2312 /* In the event of a disaster, abandon all caches and 2313 * hope for the best. 2314 */ 2315 i915_gem_clflush_object(obj, true); 2316 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 2317 } 2318 2319 i915_gem_gtt_finish_object(obj); 2320 2321 if (i915_gem_object_needs_bit17_swizzle(obj)) 2322 i915_gem_object_save_bit_17_swizzle(obj); 2323 2324 if (obj->madv == I915_MADV_DONTNEED) 2325 obj->dirty = 0; 2326 2327 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) { 2328 struct vm_page *page = sg_page_iter_page(&sg_iter); 2329 2330 if (obj->dirty) 2331 set_page_dirty(page); 2332 2333 if (obj->madv == I915_MADV_WILLNEED) 2334 mark_page_accessed(page); 2335 2336 vm_page_busy_wait(page, FALSE, "i915gem"); 2337 vm_page_unwire(page, 1); 2338 vm_page_wakeup(page); 2339 } 2340 obj->dirty = 0; 2341 2342 sg_free_table(obj->pages); 2343 kfree(obj->pages); 2344 } 2345 2346 int 2347 i915_gem_object_put_pages(struct drm_i915_gem_object *obj) 2348 { 2349 const struct drm_i915_gem_object_ops *ops = obj->ops; 2350 2351 if (obj->pages == NULL) 2352 return 0; 2353 2354 if (obj->pages_pin_count) 2355 return -EBUSY; 2356 2357 BUG_ON(i915_gem_obj_bound_any(obj)); 2358 2359 /* ->put_pages might need to allocate memory for the bit17 swizzle 2360 * array, hence protect them from being reaped by removing them from gtt 2361 * lists early. */ 2362 list_del(&obj->global_list); 2363 2364 if (obj->mapping) { 2365 if (is_vmalloc_addr(obj->mapping)) 2366 vunmap(obj->mapping); 2367 else 2368 kunmap(kmap_to_page(obj->mapping)); 2369 obj->mapping = NULL; 2370 } 2371 2372 ops->put_pages(obj); 2373 obj->pages = NULL; 2374 2375 i915_gem_object_invalidate(obj); 2376 2377 return 0; 2378 } 2379 2380 static int 2381 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 2382 { 2383 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2384 int page_count, i; 2385 vm_object_t vm_obj; 2386 struct sg_table *st; 2387 struct scatterlist *sg; 2388 struct sg_page_iter sg_iter; 2389 struct vm_page *page; 2390 unsigned long last_pfn = 0; /* suppress gcc warning */ 2391 int ret; 2392 2393 /* Assert that the object is not currently in any GPU domain. As it 2394 * wasn't in the GTT, there shouldn't be any way it could have been in 2395 * a GPU cache 2396 */ 2397 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); 2398 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); 2399 2400 st = kmalloc(sizeof(*st), M_DRM, M_WAITOK); 2401 if (st == NULL) 2402 return -ENOMEM; 2403 2404 page_count = obj->base.size / PAGE_SIZE; 2405 if (sg_alloc_table(st, page_count, GFP_KERNEL)) { 2406 kfree(st); 2407 return -ENOMEM; 2408 } 2409 2410 /* Get the list of pages out of our struct file. They'll be pinned 2411 * at this point until we release them. 2412 * 2413 * Fail silently without starting the shrinker 2414 */ 2415 vm_obj = obj->base.vm_obj; 2416 VM_OBJECT_LOCK(vm_obj); 2417 sg = st->sgl; 2418 st->nents = 0; 2419 for (i = 0; i < page_count; i++) { 2420 page = shmem_read_mapping_page(vm_obj, i); 2421 if (IS_ERR(page)) { 2422 i915_gem_shrink(dev_priv, 2423 page_count, 2424 I915_SHRINK_BOUND | 2425 I915_SHRINK_UNBOUND | 2426 I915_SHRINK_PURGEABLE); 2427 page = shmem_read_mapping_page(vm_obj, i); 2428 } 2429 if (IS_ERR(page)) { 2430 /* We've tried hard to allocate the memory by reaping 2431 * our own buffer, now let the real VM do its job and 2432 * go down in flames if truly OOM. 2433 */ 2434 i915_gem_shrink_all(dev_priv); 2435 page = shmem_read_mapping_page(vm_obj, i); 2436 if (IS_ERR(page)) { 2437 ret = PTR_ERR(page); 2438 goto err_pages; 2439 } 2440 } 2441 #ifdef CONFIG_SWIOTLB 2442 if (swiotlb_nr_tbl()) { 2443 st->nents++; 2444 sg_set_page(sg, page, PAGE_SIZE, 0); 2445 sg = sg_next(sg); 2446 continue; 2447 } 2448 #endif 2449 if (!i || page_to_pfn(page) != last_pfn + 1) { 2450 if (i) 2451 sg = sg_next(sg); 2452 st->nents++; 2453 sg_set_page(sg, page, PAGE_SIZE, 0); 2454 } else { 2455 sg->length += PAGE_SIZE; 2456 } 2457 last_pfn = page_to_pfn(page); 2458 2459 /* Check that the i965g/gm workaround works. */ 2460 } 2461 #ifdef CONFIG_SWIOTLB 2462 if (!swiotlb_nr_tbl()) 2463 #endif 2464 sg_mark_end(sg); 2465 obj->pages = st; 2466 VM_OBJECT_UNLOCK(vm_obj); 2467 2468 ret = i915_gem_gtt_prepare_object(obj); 2469 if (ret) 2470 goto err_pages; 2471 2472 if (i915_gem_object_needs_bit17_swizzle(obj)) 2473 i915_gem_object_do_bit_17_swizzle(obj); 2474 2475 if (obj->tiling_mode != I915_TILING_NONE && 2476 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) 2477 i915_gem_object_pin_pages(obj); 2478 2479 return 0; 2480 2481 err_pages: 2482 sg_mark_end(sg); 2483 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) { 2484 page = sg_page_iter_page(&sg_iter); 2485 vm_page_busy_wait(page, FALSE, "i915gem"); 2486 vm_page_unwire(page, 0); 2487 vm_page_wakeup(page); 2488 } 2489 VM_OBJECT_UNLOCK(vm_obj); 2490 sg_free_table(st); 2491 kfree(st); 2492 2493 /* shmemfs first checks if there is enough memory to allocate the page 2494 * and reports ENOSPC should there be insufficient, along with the usual 2495 * ENOMEM for a genuine allocation failure. 2496 * 2497 * We use ENOSPC in our driver to mean that we have run out of aperture 2498 * space and so want to translate the error from shmemfs back to our 2499 * usual understanding of ENOMEM. 2500 */ 2501 if (ret == -ENOSPC) 2502 ret = -ENOMEM; 2503 2504 return ret; 2505 } 2506 2507 /* Ensure that the associated pages are gathered from the backing storage 2508 * and pinned into our object. i915_gem_object_get_pages() may be called 2509 * multiple times before they are released by a single call to 2510 * i915_gem_object_put_pages() - once the pages are no longer referenced 2511 * either as a result of memory pressure (reaping pages under the shrinker) 2512 * or as the object is itself released. 2513 */ 2514 int 2515 i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2516 { 2517 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2518 const struct drm_i915_gem_object_ops *ops = obj->ops; 2519 int ret; 2520 2521 if (obj->pages) 2522 return 0; 2523 2524 if (obj->madv != I915_MADV_WILLNEED) { 2525 DRM_DEBUG("Attempting to obtain a purgeable object\n"); 2526 return -EFAULT; 2527 } 2528 2529 BUG_ON(obj->pages_pin_count); 2530 2531 ret = ops->get_pages(obj); 2532 if (ret) 2533 return ret; 2534 2535 list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list); 2536 2537 obj->get_page.sg = obj->pages->sgl; 2538 obj->get_page.last = 0; 2539 2540 return 0; 2541 } 2542 2543 void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj) 2544 { 2545 int ret; 2546 2547 lockdep_assert_held(&obj->base.dev->struct_mutex); 2548 2549 ret = i915_gem_object_get_pages(obj); 2550 if (ret) 2551 return ERR_PTR(ret); 2552 2553 i915_gem_object_pin_pages(obj); 2554 2555 if (obj->mapping == NULL) { 2556 struct vm_page **pages; 2557 2558 pages = NULL; 2559 if (obj->base.size == PAGE_SIZE) 2560 obj->mapping = kmap(sg_page(obj->pages->sgl)); 2561 else 2562 pages = drm_malloc_gfp(obj->base.size >> PAGE_SHIFT, 2563 sizeof(*pages), 2564 GFP_TEMPORARY); 2565 if (pages != NULL) { 2566 struct sg_page_iter sg_iter; 2567 int n; 2568 2569 n = 0; 2570 for_each_sg_page(obj->pages->sgl, &sg_iter, 2571 obj->pages->nents, 0) 2572 pages[n++] = sg_page_iter_page(&sg_iter); 2573 2574 obj->mapping = vmap(pages, n, 0, PAGE_KERNEL); 2575 drm_free_large(pages); 2576 } 2577 if (obj->mapping == NULL) { 2578 i915_gem_object_unpin_pages(obj); 2579 return ERR_PTR(-ENOMEM); 2580 } 2581 } 2582 2583 return obj->mapping; 2584 } 2585 2586 void i915_vma_move_to_active(struct i915_vma *vma, 2587 struct drm_i915_gem_request *req) 2588 { 2589 struct drm_i915_gem_object *obj = vma->obj; 2590 struct intel_engine_cs *engine; 2591 2592 engine = i915_gem_request_get_engine(req); 2593 2594 /* Add a reference if we're newly entering the active list. */ 2595 if (obj->active == 0) 2596 drm_gem_object_reference(&obj->base); 2597 obj->active |= intel_engine_flag(engine); 2598 2599 list_move_tail(&obj->engine_list[engine->id], &engine->active_list); 2600 i915_gem_request_assign(&obj->last_read_req[engine->id], req); 2601 2602 list_move_tail(&vma->vm_link, &vma->vm->active_list); 2603 } 2604 2605 static void 2606 i915_gem_object_retire__write(struct drm_i915_gem_object *obj) 2607 { 2608 GEM_BUG_ON(obj->last_write_req == NULL); 2609 GEM_BUG_ON(!(obj->active & intel_engine_flag(obj->last_write_req->engine))); 2610 2611 i915_gem_request_assign(&obj->last_write_req, NULL); 2612 intel_fb_obj_flush(obj, true, ORIGIN_CS); 2613 } 2614 2615 static void 2616 i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring) 2617 { 2618 struct i915_vma *vma; 2619 2620 GEM_BUG_ON(obj->last_read_req[ring] == NULL); 2621 GEM_BUG_ON(!(obj->active & (1 << ring))); 2622 2623 list_del_init(&obj->engine_list[ring]); 2624 i915_gem_request_assign(&obj->last_read_req[ring], NULL); 2625 2626 if (obj->last_write_req && obj->last_write_req->engine->id == ring) 2627 i915_gem_object_retire__write(obj); 2628 2629 obj->active &= ~(1 << ring); 2630 if (obj->active) 2631 return; 2632 2633 /* Bump our place on the bound list to keep it roughly in LRU order 2634 * so that we don't steal from recently used but inactive objects 2635 * (unless we are forced to ofc!) 2636 */ 2637 list_move_tail(&obj->global_list, 2638 &to_i915(obj->base.dev)->mm.bound_list); 2639 2640 list_for_each_entry(vma, &obj->vma_list, obj_link) { 2641 if (!list_empty(&vma->vm_link)) 2642 list_move_tail(&vma->vm_link, &vma->vm->inactive_list); 2643 } 2644 2645 i915_gem_request_assign(&obj->last_fenced_req, NULL); 2646 drm_gem_object_unreference(&obj->base); 2647 } 2648 2649 static int 2650 i915_gem_init_seqno(struct drm_device *dev, u32 seqno) 2651 { 2652 struct drm_i915_private *dev_priv = dev->dev_private; 2653 struct intel_engine_cs *engine; 2654 int ret; 2655 2656 /* Carefully retire all requests without writing to the rings */ 2657 for_each_engine(engine, dev_priv) { 2658 ret = intel_engine_idle(engine); 2659 if (ret) 2660 return ret; 2661 } 2662 i915_gem_retire_requests(dev); 2663 2664 /* Finally reset hw state */ 2665 for_each_engine(engine, dev_priv) 2666 intel_ring_init_seqno(engine, seqno); 2667 2668 return 0; 2669 } 2670 2671 int i915_gem_set_seqno(struct drm_device *dev, u32 seqno) 2672 { 2673 struct drm_i915_private *dev_priv = dev->dev_private; 2674 int ret; 2675 2676 if (seqno == 0) 2677 return -EINVAL; 2678 2679 /* HWS page needs to be set less than what we 2680 * will inject to ring 2681 */ 2682 ret = i915_gem_init_seqno(dev, seqno - 1); 2683 if (ret) 2684 return ret; 2685 2686 /* Carefully set the last_seqno value so that wrap 2687 * detection still works 2688 */ 2689 dev_priv->next_seqno = seqno; 2690 dev_priv->last_seqno = seqno - 1; 2691 if (dev_priv->last_seqno == 0) 2692 dev_priv->last_seqno--; 2693 2694 return 0; 2695 } 2696 2697 int 2698 i915_gem_get_seqno(struct drm_device *dev, u32 *seqno) 2699 { 2700 struct drm_i915_private *dev_priv = dev->dev_private; 2701 2702 /* reserve 0 for non-seqno */ 2703 if (dev_priv->next_seqno == 0) { 2704 int ret = i915_gem_init_seqno(dev, 0); 2705 if (ret) 2706 return ret; 2707 2708 dev_priv->next_seqno = 1; 2709 } 2710 2711 *seqno = dev_priv->last_seqno = dev_priv->next_seqno++; 2712 return 0; 2713 } 2714 2715 /* 2716 * NB: This function is not allowed to fail. Doing so would mean the the 2717 * request is not being tracked for completion but the work itself is 2718 * going to happen on the hardware. This would be a Bad Thing(tm). 2719 */ 2720 void __i915_add_request(struct drm_i915_gem_request *request, 2721 struct drm_i915_gem_object *obj, 2722 bool flush_caches) 2723 { 2724 struct intel_engine_cs *engine; 2725 struct drm_i915_private *dev_priv; 2726 struct intel_ringbuffer *ringbuf; 2727 u32 request_start; 2728 int ret; 2729 2730 if (WARN_ON(request == NULL)) 2731 return; 2732 2733 engine = request->engine; 2734 dev_priv = request->i915; 2735 ringbuf = request->ringbuf; 2736 2737 /* 2738 * To ensure that this call will not fail, space for its emissions 2739 * should already have been reserved in the ring buffer. Let the ring 2740 * know that it is time to use that space up. 2741 */ 2742 intel_ring_reserved_space_use(ringbuf); 2743 2744 request_start = intel_ring_get_tail(ringbuf); 2745 /* 2746 * Emit any outstanding flushes - execbuf can fail to emit the flush 2747 * after having emitted the batchbuffer command. Hence we need to fix 2748 * things up similar to emitting the lazy request. The difference here 2749 * is that the flush _must_ happen before the next request, no matter 2750 * what. 2751 */ 2752 if (flush_caches) { 2753 if (i915.enable_execlists) 2754 ret = logical_ring_flush_all_caches(request); 2755 else 2756 ret = intel_ring_flush_all_caches(request); 2757 /* Not allowed to fail! */ 2758 WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret); 2759 } 2760 2761 trace_i915_gem_request_add(request); 2762 2763 request->head = request_start; 2764 2765 /* Whilst this request exists, batch_obj will be on the 2766 * active_list, and so will hold the active reference. Only when this 2767 * request is retired will the the batch_obj be moved onto the 2768 * inactive_list and lose its active reference. Hence we do not need 2769 * to explicitly hold another reference here. 2770 */ 2771 request->batch_obj = obj; 2772 2773 /* Seal the request and mark it as pending execution. Note that 2774 * we may inspect this state, without holding any locks, during 2775 * hangcheck. Hence we apply the barrier to ensure that we do not 2776 * see a more recent value in the hws than we are tracking. 2777 */ 2778 request->emitted_jiffies = jiffies; 2779 request->previous_seqno = engine->last_submitted_seqno; 2780 smp_store_mb(engine->last_submitted_seqno, request->seqno); 2781 list_add_tail(&request->list, &engine->request_list); 2782 2783 /* Record the position of the start of the request so that 2784 * should we detect the updated seqno part-way through the 2785 * GPU processing the request, we never over-estimate the 2786 * position of the head. 2787 */ 2788 request->postfix = intel_ring_get_tail(ringbuf); 2789 2790 if (i915.enable_execlists) 2791 ret = engine->emit_request(request); 2792 else { 2793 ret = engine->add_request(request); 2794 2795 request->tail = intel_ring_get_tail(ringbuf); 2796 } 2797 2798 /* Not allowed to fail! */ 2799 WARN(ret, "emit|add_request failed: %d!\n", ret); 2800 2801 i915_queue_hangcheck(engine->dev); 2802 2803 queue_delayed_work(dev_priv->wq, 2804 &dev_priv->mm.retire_work, 2805 round_jiffies_up_relative(HZ)); 2806 intel_mark_busy(dev_priv->dev); 2807 2808 /* Sanity check that the reserved size was large enough. */ 2809 intel_ring_reserved_space_end(ringbuf); 2810 } 2811 2812 static bool i915_context_is_banned(struct drm_i915_private *dev_priv, 2813 const struct intel_context *ctx) 2814 { 2815 unsigned long elapsed; 2816 2817 elapsed = get_seconds() - ctx->hang_stats.guilty_ts; 2818 2819 if (ctx->hang_stats.banned) 2820 return true; 2821 2822 if (ctx->hang_stats.ban_period_seconds && 2823 elapsed <= ctx->hang_stats.ban_period_seconds) { 2824 if (!i915_gem_context_is_default(ctx)) { 2825 DRM_DEBUG("context hanging too fast, banning!\n"); 2826 return true; 2827 } else if (i915_stop_ring_allow_ban(dev_priv)) { 2828 if (i915_stop_ring_allow_warn(dev_priv)) 2829 DRM_ERROR("gpu hanging too fast, banning!\n"); 2830 return true; 2831 } 2832 } 2833 2834 return false; 2835 } 2836 2837 static void i915_set_reset_status(struct drm_i915_private *dev_priv, 2838 struct intel_context *ctx, 2839 const bool guilty) 2840 { 2841 struct i915_ctx_hang_stats *hs; 2842 2843 if (WARN_ON(!ctx)) 2844 return; 2845 2846 hs = &ctx->hang_stats; 2847 2848 if (guilty) { 2849 hs->banned = i915_context_is_banned(dev_priv, ctx); 2850 hs->batch_active++; 2851 hs->guilty_ts = get_seconds(); 2852 } else { 2853 hs->batch_pending++; 2854 } 2855 } 2856 2857 void i915_gem_request_free(struct kref *req_ref) 2858 { 2859 struct drm_i915_gem_request *req = container_of(req_ref, 2860 typeof(*req), ref); 2861 struct intel_context *ctx = req->ctx; 2862 2863 if (req->file_priv) 2864 i915_gem_request_remove_from_client(req); 2865 2866 if (ctx) { 2867 if (i915.enable_execlists && ctx != req->i915->kernel_context) 2868 intel_lr_context_unpin(ctx, req->engine); 2869 2870 i915_gem_context_unreference(ctx); 2871 } 2872 2873 kfree(req); 2874 } 2875 2876 static inline int 2877 __i915_gem_request_alloc(struct intel_engine_cs *engine, 2878 struct intel_context *ctx, 2879 struct drm_i915_gem_request **req_out) 2880 { 2881 struct drm_i915_private *dev_priv = to_i915(engine->dev); 2882 unsigned reset_counter = i915_reset_counter(&dev_priv->gpu_error); 2883 struct drm_i915_gem_request *req; 2884 int ret; 2885 2886 if (!req_out) 2887 return -EINVAL; 2888 2889 *req_out = NULL; 2890 2891 /* ABI: Before userspace accesses the GPU (e.g. execbuffer), report 2892 * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex 2893 * and restart. 2894 */ 2895 ret = i915_gem_check_wedge(reset_counter, dev_priv->mm.interruptible); 2896 if (ret) 2897 return ret; 2898 2899 req = kzalloc(sizeof(*req), GFP_KERNEL); 2900 if (req == NULL) 2901 return -ENOMEM; 2902 2903 ret = i915_gem_get_seqno(engine->dev, &req->seqno); 2904 if (ret) 2905 goto err; 2906 2907 kref_init(&req->ref); 2908 req->i915 = dev_priv; 2909 req->engine = engine; 2910 req->reset_counter = reset_counter; 2911 req->ctx = ctx; 2912 i915_gem_context_reference(req->ctx); 2913 2914 if (i915.enable_execlists) 2915 ret = intel_logical_ring_alloc_request_extras(req); 2916 else 2917 ret = intel_ring_alloc_request_extras(req); 2918 if (ret) { 2919 i915_gem_context_unreference(req->ctx); 2920 goto err; 2921 } 2922 2923 /* 2924 * Reserve space in the ring buffer for all the commands required to 2925 * eventually emit this request. This is to guarantee that the 2926 * i915_add_request() call can't fail. Note that the reserve may need 2927 * to be redone if the request is not actually submitted straight 2928 * away, e.g. because a GPU scheduler has deferred it. 2929 */ 2930 if (i915.enable_execlists) 2931 ret = intel_logical_ring_reserve_space(req); 2932 else 2933 ret = intel_ring_reserve_space(req); 2934 if (ret) { 2935 /* 2936 * At this point, the request is fully allocated even if not 2937 * fully prepared. Thus it can be cleaned up using the proper 2938 * free code. 2939 */ 2940 intel_ring_reserved_space_cancel(req->ringbuf); 2941 i915_gem_request_unreference(req); 2942 return ret; 2943 } 2944 2945 *req_out = req; 2946 return 0; 2947 2948 err: 2949 kfree(req); 2950 return ret; 2951 } 2952 2953 /** 2954 * i915_gem_request_alloc - allocate a request structure 2955 * 2956 * @engine: engine that we wish to issue the request on. 2957 * @ctx: context that the request will be associated with. 2958 * This can be NULL if the request is not directly related to 2959 * any specific user context, in which case this function will 2960 * choose an appropriate context to use. 2961 * 2962 * Returns a pointer to the allocated request if successful, 2963 * or an error code if not. 2964 */ 2965 struct drm_i915_gem_request * 2966 i915_gem_request_alloc(struct intel_engine_cs *engine, 2967 struct intel_context *ctx) 2968 { 2969 struct drm_i915_gem_request *req; 2970 int err; 2971 2972 if (ctx == NULL) 2973 ctx = to_i915(engine->dev)->kernel_context; 2974 err = __i915_gem_request_alloc(engine, ctx, &req); 2975 return err ? ERR_PTR(err) : req; 2976 } 2977 2978 struct drm_i915_gem_request * 2979 i915_gem_find_active_request(struct intel_engine_cs *engine) 2980 { 2981 struct drm_i915_gem_request *request; 2982 2983 list_for_each_entry(request, &engine->request_list, list) { 2984 if (i915_gem_request_completed(request, false)) 2985 continue; 2986 2987 return request; 2988 } 2989 2990 return NULL; 2991 } 2992 2993 static void i915_gem_reset_engine_status(struct drm_i915_private *dev_priv, 2994 struct intel_engine_cs *engine) 2995 { 2996 struct drm_i915_gem_request *request; 2997 bool ring_hung; 2998 2999 request = i915_gem_find_active_request(engine); 3000 3001 if (request == NULL) 3002 return; 3003 3004 ring_hung = engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG; 3005 3006 i915_set_reset_status(dev_priv, request->ctx, ring_hung); 3007 3008 list_for_each_entry_continue(request, &engine->request_list, list) 3009 i915_set_reset_status(dev_priv, request->ctx, false); 3010 } 3011 3012 static void i915_gem_reset_engine_cleanup(struct drm_i915_private *dev_priv, 3013 struct intel_engine_cs *engine) 3014 { 3015 struct intel_ringbuffer *buffer; 3016 3017 while (!list_empty(&engine->active_list)) { 3018 struct drm_i915_gem_object *obj; 3019 3020 obj = list_first_entry(&engine->active_list, 3021 struct drm_i915_gem_object, 3022 engine_list[engine->id]); 3023 3024 i915_gem_object_retire__read(obj, engine->id); 3025 } 3026 3027 /* 3028 * Clear the execlists queue up before freeing the requests, as those 3029 * are the ones that keep the context and ringbuffer backing objects 3030 * pinned in place. 3031 */ 3032 3033 if (i915.enable_execlists) { 3034 /* Ensure irq handler finishes or is cancelled. */ 3035 tasklet_kill(&engine->irq_tasklet); 3036 3037 spin_lock_bh(&engine->execlist_lock); 3038 /* list_splice_tail_init checks for empty lists */ 3039 list_splice_tail_init(&engine->execlist_queue, 3040 &engine->execlist_retired_req_list); 3041 spin_unlock_bh(&engine->execlist_lock); 3042 3043 intel_execlists_retire_requests(engine); 3044 } 3045 3046 /* 3047 * We must free the requests after all the corresponding objects have 3048 * been moved off active lists. Which is the same order as the normal 3049 * retire_requests function does. This is important if object hold 3050 * implicit references on things like e.g. ppgtt address spaces through 3051 * the request. 3052 */ 3053 while (!list_empty(&engine->request_list)) { 3054 struct drm_i915_gem_request *request; 3055 3056 request = list_first_entry(&engine->request_list, 3057 struct drm_i915_gem_request, 3058 list); 3059 3060 i915_gem_request_retire(request); 3061 } 3062 3063 /* Having flushed all requests from all queues, we know that all 3064 * ringbuffers must now be empty. However, since we do not reclaim 3065 * all space when retiring the request (to prevent HEADs colliding 3066 * with rapid ringbuffer wraparound) the amount of available space 3067 * upon reset is less than when we start. Do one more pass over 3068 * all the ringbuffers to reset last_retired_head. 3069 */ 3070 list_for_each_entry(buffer, &engine->buffers, link) { 3071 buffer->last_retired_head = buffer->tail; 3072 intel_ring_update_space(buffer); 3073 } 3074 3075 intel_ring_init_seqno(engine, engine->last_submitted_seqno); 3076 } 3077 3078 void i915_gem_reset(struct drm_device *dev) 3079 { 3080 struct drm_i915_private *dev_priv = dev->dev_private; 3081 struct intel_engine_cs *engine; 3082 3083 /* 3084 * Before we free the objects from the requests, we need to inspect 3085 * them for finding the guilty party. As the requests only borrow 3086 * their reference to the objects, the inspection must be done first. 3087 */ 3088 for_each_engine(engine, dev_priv) 3089 i915_gem_reset_engine_status(dev_priv, engine); 3090 3091 for_each_engine(engine, dev_priv) 3092 i915_gem_reset_engine_cleanup(dev_priv, engine); 3093 3094 i915_gem_context_reset(dev); 3095 3096 i915_gem_restore_fences(dev); 3097 3098 WARN_ON(i915_verify_lists(dev)); 3099 } 3100 3101 /** 3102 * This function clears the request list as sequence numbers are passed. 3103 */ 3104 void 3105 i915_gem_retire_requests_ring(struct intel_engine_cs *engine) 3106 { 3107 WARN_ON(i915_verify_lists(engine->dev)); 3108 3109 /* Retire requests first as we use it above for the early return. 3110 * If we retire requests last, we may use a later seqno and so clear 3111 * the requests lists without clearing the active list, leading to 3112 * confusion. 3113 */ 3114 while (!list_empty(&engine->request_list)) { 3115 struct drm_i915_gem_request *request; 3116 3117 request = list_first_entry(&engine->request_list, 3118 struct drm_i915_gem_request, 3119 list); 3120 3121 if (!i915_gem_request_completed(request, true)) 3122 break; 3123 3124 i915_gem_request_retire(request); 3125 } 3126 3127 /* Move any buffers on the active list that are no longer referenced 3128 * by the ringbuffer to the flushing/inactive lists as appropriate, 3129 * before we free the context associated with the requests. 3130 */ 3131 while (!list_empty(&engine->active_list)) { 3132 struct drm_i915_gem_object *obj; 3133 3134 obj = list_first_entry(&engine->active_list, 3135 struct drm_i915_gem_object, 3136 engine_list[engine->id]); 3137 3138 if (!list_empty(&obj->last_read_req[engine->id]->list)) 3139 break; 3140 3141 i915_gem_object_retire__read(obj, engine->id); 3142 } 3143 3144 if (unlikely(engine->trace_irq_req && 3145 i915_gem_request_completed(engine->trace_irq_req, true))) { 3146 engine->irq_put(engine); 3147 i915_gem_request_assign(&engine->trace_irq_req, NULL); 3148 } 3149 3150 WARN_ON(i915_verify_lists(engine->dev)); 3151 } 3152 3153 bool 3154 i915_gem_retire_requests(struct drm_device *dev) 3155 { 3156 struct drm_i915_private *dev_priv = dev->dev_private; 3157 struct intel_engine_cs *engine; 3158 bool idle = true; 3159 3160 for_each_engine(engine, dev_priv) { 3161 i915_gem_retire_requests_ring(engine); 3162 idle &= list_empty(&engine->request_list); 3163 if (i915.enable_execlists) { 3164 spin_lock_bh(&engine->execlist_lock); 3165 idle &= list_empty(&engine->execlist_queue); 3166 spin_unlock_bh(&engine->execlist_lock); 3167 3168 intel_execlists_retire_requests(engine); 3169 } 3170 } 3171 3172 if (idle) 3173 mod_delayed_work(dev_priv->wq, 3174 &dev_priv->mm.idle_work, 3175 msecs_to_jiffies(100)); 3176 3177 return idle; 3178 } 3179 3180 static void 3181 i915_gem_retire_work_handler(struct work_struct *work) 3182 { 3183 struct drm_i915_private *dev_priv = 3184 container_of(work, typeof(*dev_priv), mm.retire_work.work); 3185 struct drm_device *dev = dev_priv->dev; 3186 bool idle; 3187 3188 /* Come back later if the device is busy... */ 3189 idle = false; 3190 if (mutex_trylock(&dev->struct_mutex)) { 3191 idle = i915_gem_retire_requests(dev); 3192 mutex_unlock(&dev->struct_mutex); 3193 } 3194 if (!idle) 3195 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 3196 round_jiffies_up_relative(HZ)); 3197 } 3198 3199 static void 3200 i915_gem_idle_work_handler(struct work_struct *work) 3201 { 3202 struct drm_i915_private *dev_priv = 3203 container_of(work, typeof(*dev_priv), mm.idle_work.work); 3204 struct drm_device *dev = dev_priv->dev; 3205 struct intel_engine_cs *engine; 3206 3207 for_each_engine(engine, dev_priv) 3208 if (!list_empty(&engine->request_list)) 3209 return; 3210 3211 /* we probably should sync with hangcheck here, using cancel_work_sync. 3212 * Also locking seems to be fubar here, engine->request_list is protected 3213 * by dev->struct_mutex. */ 3214 3215 intel_mark_idle(dev); 3216 3217 if (mutex_trylock(&dev->struct_mutex)) { 3218 for_each_engine(engine, dev_priv) 3219 i915_gem_batch_pool_fini(&engine->batch_pool); 3220 3221 mutex_unlock(&dev->struct_mutex); 3222 } 3223 } 3224 3225 /** 3226 * Ensures that an object will eventually get non-busy by flushing any required 3227 * write domains, emitting any outstanding lazy request and retiring and 3228 * completed requests. 3229 */ 3230 static int 3231 i915_gem_object_flush_active(struct drm_i915_gem_object *obj) 3232 { 3233 int i; 3234 3235 if (!obj->active) 3236 return 0; 3237 3238 for (i = 0; i < I915_NUM_ENGINES; i++) { 3239 struct drm_i915_gem_request *req; 3240 3241 req = obj->last_read_req[i]; 3242 if (req == NULL) 3243 continue; 3244 3245 if (list_empty(&req->list)) 3246 goto retire; 3247 3248 if (i915_gem_request_completed(req, true)) { 3249 __i915_gem_request_retire__upto(req); 3250 retire: 3251 i915_gem_object_retire__read(obj, i); 3252 } 3253 } 3254 3255 return 0; 3256 } 3257 3258 /** 3259 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT 3260 * @DRM_IOCTL_ARGS: standard ioctl arguments 3261 * 3262 * Returns 0 if successful, else an error is returned with the remaining time in 3263 * the timeout parameter. 3264 * -ETIME: object is still busy after timeout 3265 * -ERESTARTSYS: signal interrupted the wait 3266 * -ENONENT: object doesn't exist 3267 * Also possible, but rare: 3268 * -EAGAIN: GPU wedged 3269 * -ENOMEM: damn 3270 * -ENODEV: Internal IRQ fail 3271 * -E?: The add request failed 3272 * 3273 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any 3274 * non-zero timeout parameter the wait ioctl will wait for the given number of 3275 * nanoseconds on an object becoming unbusy. Since the wait itself does so 3276 * without holding struct_mutex the object may become re-busied before this 3277 * function completes. A similar but shorter * race condition exists in the busy 3278 * ioctl 3279 */ 3280 int 3281 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 3282 { 3283 struct drm_i915_gem_wait *args = data; 3284 struct drm_i915_gem_object *obj; 3285 struct drm_i915_gem_request *req[I915_NUM_ENGINES]; 3286 int i, n = 0; 3287 int ret; 3288 3289 if (args->flags != 0) 3290 return -EINVAL; 3291 3292 ret = i915_mutex_lock_interruptible(dev); 3293 if (ret) 3294 return ret; 3295 3296 obj = to_intel_bo(drm_gem_object_lookup(file, args->bo_handle)); 3297 if (&obj->base == NULL) { 3298 mutex_unlock(&dev->struct_mutex); 3299 return -ENOENT; 3300 } 3301 3302 /* Need to make sure the object gets inactive eventually. */ 3303 ret = i915_gem_object_flush_active(obj); 3304 if (ret) 3305 goto out; 3306 3307 if (!obj->active) 3308 goto out; 3309 3310 /* Do this after OLR check to make sure we make forward progress polling 3311 * on this IOCTL with a timeout == 0 (like busy ioctl) 3312 */ 3313 if (args->timeout_ns == 0) { 3314 ret = -ETIME; 3315 goto out; 3316 } 3317 3318 drm_gem_object_unreference(&obj->base); 3319 3320 for (i = 0; i < I915_NUM_ENGINES; i++) { 3321 if (obj->last_read_req[i] == NULL) 3322 continue; 3323 3324 req[n++] = i915_gem_request_reference(obj->last_read_req[i]); 3325 } 3326 3327 mutex_unlock(&dev->struct_mutex); 3328 3329 for (i = 0; i < n; i++) { 3330 if (ret == 0) 3331 ret = __i915_wait_request(req[i], true, 3332 args->timeout_ns > 0 ? &args->timeout_ns : NULL, 3333 to_rps_client(file)); 3334 i915_gem_request_unreference__unlocked(req[i]); 3335 } 3336 return ret; 3337 3338 out: 3339 drm_gem_object_unreference(&obj->base); 3340 mutex_unlock(&dev->struct_mutex); 3341 return ret; 3342 } 3343 3344 static int 3345 __i915_gem_object_sync(struct drm_i915_gem_object *obj, 3346 struct intel_engine_cs *to, 3347 struct drm_i915_gem_request *from_req, 3348 struct drm_i915_gem_request **to_req) 3349 { 3350 struct intel_engine_cs *from; 3351 int ret; 3352 3353 from = i915_gem_request_get_engine(from_req); 3354 if (to == from) 3355 return 0; 3356 3357 if (i915_gem_request_completed(from_req, true)) 3358 return 0; 3359 3360 if (!i915_semaphore_is_enabled(obj->base.dev)) { 3361 struct drm_i915_private *i915 = to_i915(obj->base.dev); 3362 ret = __i915_wait_request(from_req, 3363 i915->mm.interruptible, 3364 NULL, 3365 &i915->rps.semaphores); 3366 if (ret) 3367 return ret; 3368 3369 i915_gem_object_retire_request(obj, from_req); 3370 } else { 3371 int idx = intel_ring_sync_index(from, to); 3372 u32 seqno = i915_gem_request_get_seqno(from_req); 3373 3374 WARN_ON(!to_req); 3375 3376 if (seqno <= from->semaphore.sync_seqno[idx]) 3377 return 0; 3378 3379 if (*to_req == NULL) { 3380 struct drm_i915_gem_request *req; 3381 3382 req = i915_gem_request_alloc(to, NULL); 3383 if (IS_ERR(req)) 3384 return PTR_ERR(req); 3385 3386 *to_req = req; 3387 } 3388 3389 trace_i915_gem_ring_sync_to(*to_req, from, from_req); 3390 ret = to->semaphore.sync_to(*to_req, from, seqno); 3391 if (ret) 3392 return ret; 3393 3394 /* We use last_read_req because sync_to() 3395 * might have just caused seqno wrap under 3396 * the radar. 3397 */ 3398 from->semaphore.sync_seqno[idx] = 3399 i915_gem_request_get_seqno(obj->last_read_req[from->id]); 3400 } 3401 3402 return 0; 3403 } 3404 3405 /** 3406 * i915_gem_object_sync - sync an object to a ring. 3407 * 3408 * @obj: object which may be in use on another ring. 3409 * @to: ring we wish to use the object on. May be NULL. 3410 * @to_req: request we wish to use the object for. See below. 3411 * This will be allocated and returned if a request is 3412 * required but not passed in. 3413 * 3414 * This code is meant to abstract object synchronization with the GPU. 3415 * Calling with NULL implies synchronizing the object with the CPU 3416 * rather than a particular GPU ring. Conceptually we serialise writes 3417 * between engines inside the GPU. We only allow one engine to write 3418 * into a buffer at any time, but multiple readers. To ensure each has 3419 * a coherent view of memory, we must: 3420 * 3421 * - If there is an outstanding write request to the object, the new 3422 * request must wait for it to complete (either CPU or in hw, requests 3423 * on the same ring will be naturally ordered). 3424 * 3425 * - If we are a write request (pending_write_domain is set), the new 3426 * request must wait for outstanding read requests to complete. 3427 * 3428 * For CPU synchronisation (NULL to) no request is required. For syncing with 3429 * rings to_req must be non-NULL. However, a request does not have to be 3430 * pre-allocated. If *to_req is NULL and sync commands will be emitted then a 3431 * request will be allocated automatically and returned through *to_req. Note 3432 * that it is not guaranteed that commands will be emitted (because the system 3433 * might already be idle). Hence there is no need to create a request that 3434 * might never have any work submitted. Note further that if a request is 3435 * returned in *to_req, it is the responsibility of the caller to submit 3436 * that request (after potentially adding more work to it). 3437 * 3438 * Returns 0 if successful, else propagates up the lower layer error. 3439 */ 3440 int 3441 i915_gem_object_sync(struct drm_i915_gem_object *obj, 3442 struct intel_engine_cs *to, 3443 struct drm_i915_gem_request **to_req) 3444 { 3445 const bool readonly = obj->base.pending_write_domain == 0; 3446 struct drm_i915_gem_request *req[I915_NUM_ENGINES]; 3447 int ret, i, n; 3448 3449 if (!obj->active) 3450 return 0; 3451 3452 if (to == NULL) 3453 return i915_gem_object_wait_rendering(obj, readonly); 3454 3455 n = 0; 3456 if (readonly) { 3457 if (obj->last_write_req) 3458 req[n++] = obj->last_write_req; 3459 } else { 3460 for (i = 0; i < I915_NUM_ENGINES; i++) 3461 if (obj->last_read_req[i]) 3462 req[n++] = obj->last_read_req[i]; 3463 } 3464 for (i = 0; i < n; i++) { 3465 ret = __i915_gem_object_sync(obj, to, req[i], to_req); 3466 if (ret) 3467 return ret; 3468 } 3469 3470 return 0; 3471 } 3472 3473 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) 3474 { 3475 u32 old_write_domain, old_read_domains; 3476 3477 /* Force a pagefault for domain tracking on next user access */ 3478 i915_gem_release_mmap(obj); 3479 3480 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3481 return; 3482 3483 old_read_domains = obj->base.read_domains; 3484 old_write_domain = obj->base.write_domain; 3485 3486 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT; 3487 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT; 3488 3489 trace_i915_gem_object_change_domain(obj, 3490 old_read_domains, 3491 old_write_domain); 3492 } 3493 3494 static int __i915_vma_unbind(struct i915_vma *vma, bool wait) 3495 { 3496 struct drm_i915_gem_object *obj = vma->obj; 3497 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3498 int ret; 3499 3500 if (list_empty(&vma->obj_link)) 3501 return 0; 3502 3503 if (!drm_mm_node_allocated(&vma->node)) { 3504 i915_gem_vma_destroy(vma); 3505 return 0; 3506 } 3507 3508 if (vma->pin_count) 3509 return -EBUSY; 3510 3511 BUG_ON(obj->pages == NULL); 3512 3513 if (wait) { 3514 ret = i915_gem_object_wait_rendering(obj, false); 3515 if (ret) 3516 return ret; 3517 } 3518 3519 if (vma->is_ggtt && vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { 3520 i915_gem_object_finish_gtt(obj); 3521 3522 /* release the fence reg _after_ flushing */ 3523 ret = i915_gem_object_put_fence(obj); 3524 if (ret) 3525 return ret; 3526 } 3527 3528 trace_i915_vma_unbind(vma); 3529 3530 vma->vm->unbind_vma(vma); 3531 vma->bound = 0; 3532 3533 list_del_init(&vma->vm_link); 3534 if (vma->is_ggtt) { 3535 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { 3536 obj->map_and_fenceable = false; 3537 } else if (vma->ggtt_view.pages) { 3538 sg_free_table(vma->ggtt_view.pages); 3539 kfree(vma->ggtt_view.pages); 3540 } 3541 vma->ggtt_view.pages = NULL; 3542 } 3543 3544 drm_mm_remove_node(&vma->node); 3545 i915_gem_vma_destroy(vma); 3546 3547 /* Since the unbound list is global, only move to that list if 3548 * no more VMAs exist. */ 3549 if (list_empty(&obj->vma_list)) 3550 list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list); 3551 3552 /* And finally now the object is completely decoupled from this vma, 3553 * we can drop its hold on the backing storage and allow it to be 3554 * reaped by the shrinker. 3555 */ 3556 i915_gem_object_unpin_pages(obj); 3557 3558 return 0; 3559 } 3560 3561 int i915_vma_unbind(struct i915_vma *vma) 3562 { 3563 return __i915_vma_unbind(vma, true); 3564 } 3565 3566 int __i915_vma_unbind_no_wait(struct i915_vma *vma) 3567 { 3568 return __i915_vma_unbind(vma, false); 3569 } 3570 3571 int i915_gpu_idle(struct drm_device *dev) 3572 { 3573 struct drm_i915_private *dev_priv = dev->dev_private; 3574 struct intel_engine_cs *engine; 3575 int ret; 3576 3577 /* Flush everything onto the inactive list. */ 3578 for_each_engine(engine, dev_priv) { 3579 if (!i915.enable_execlists) { 3580 struct drm_i915_gem_request *req; 3581 3582 req = i915_gem_request_alloc(engine, NULL); 3583 if (IS_ERR(req)) 3584 return PTR_ERR(req); 3585 3586 ret = i915_switch_context(req); 3587 i915_add_request_no_flush(req); 3588 if (ret) 3589 return ret; 3590 } 3591 3592 ret = intel_engine_idle(engine); 3593 if (ret) 3594 return ret; 3595 } 3596 3597 WARN_ON(i915_verify_lists(dev)); 3598 return 0; 3599 } 3600 3601 static bool i915_gem_valid_gtt_space(struct i915_vma *vma, 3602 unsigned long cache_level) 3603 { 3604 struct drm_mm_node *gtt_space = &vma->node; 3605 struct drm_mm_node *other; 3606 3607 /* 3608 * On some machines we have to be careful when putting differing types 3609 * of snoopable memory together to avoid the prefetcher crossing memory 3610 * domains and dying. During vm initialisation, we decide whether or not 3611 * these constraints apply and set the drm_mm.color_adjust 3612 * appropriately. 3613 */ 3614 if (vma->vm->mm.color_adjust == NULL) 3615 return true; 3616 3617 if (!drm_mm_node_allocated(gtt_space)) 3618 return true; 3619 3620 if (list_empty(>t_space->node_list)) 3621 return true; 3622 3623 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list); 3624 if (other->allocated && !other->hole_follows && other->color != cache_level) 3625 return false; 3626 3627 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list); 3628 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level) 3629 return false; 3630 3631 return true; 3632 } 3633 3634 /** 3635 * Finds free space in the GTT aperture and binds the object or a view of it 3636 * there. 3637 */ 3638 static struct i915_vma * 3639 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, 3640 struct i915_address_space *vm, 3641 const struct i915_ggtt_view *ggtt_view, 3642 unsigned alignment, 3643 uint64_t flags) 3644 { 3645 struct drm_device *dev = obj->base.dev; 3646 struct drm_i915_private *dev_priv = to_i915(dev); 3647 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3648 u32 fence_alignment, unfenced_alignment; 3649 u32 search_flag, alloc_flag; 3650 u64 start, end; 3651 u64 size, fence_size; 3652 struct i915_vma *vma; 3653 int ret; 3654 3655 if (i915_is_ggtt(vm)) { 3656 u32 view_size; 3657 3658 if (WARN_ON(!ggtt_view)) 3659 return ERR_PTR(-EINVAL); 3660 3661 view_size = i915_ggtt_view_size(obj, ggtt_view); 3662 3663 fence_size = i915_gem_get_gtt_size(dev, 3664 view_size, 3665 obj->tiling_mode); 3666 fence_alignment = i915_gem_get_gtt_alignment(dev, 3667 view_size, 3668 obj->tiling_mode, 3669 true); 3670 unfenced_alignment = i915_gem_get_gtt_alignment(dev, 3671 view_size, 3672 obj->tiling_mode, 3673 false); 3674 size = flags & PIN_MAPPABLE ? fence_size : view_size; 3675 } else { 3676 fence_size = i915_gem_get_gtt_size(dev, 3677 obj->base.size, 3678 obj->tiling_mode); 3679 fence_alignment = i915_gem_get_gtt_alignment(dev, 3680 obj->base.size, 3681 obj->tiling_mode, 3682 true); 3683 unfenced_alignment = 3684 i915_gem_get_gtt_alignment(dev, 3685 obj->base.size, 3686 obj->tiling_mode, 3687 false); 3688 size = flags & PIN_MAPPABLE ? fence_size : obj->base.size; 3689 } 3690 3691 start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; 3692 end = vm->total; 3693 if (flags & PIN_MAPPABLE) 3694 end = min_t(u64, end, ggtt->mappable_end); 3695 if (flags & PIN_ZONE_4G) 3696 end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE); 3697 3698 if (alignment == 0) 3699 alignment = flags & PIN_MAPPABLE ? fence_alignment : 3700 unfenced_alignment; 3701 if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) { 3702 DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n", 3703 ggtt_view ? ggtt_view->type : 0, 3704 alignment); 3705 return ERR_PTR(-EINVAL); 3706 } 3707 3708 /* If binding the object/GGTT view requires more space than the entire 3709 * aperture has, reject it early before evicting everything in a vain 3710 * attempt to find space. 3711 */ 3712 if (size > end) { 3713 DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%llu > %s aperture=%llu\n", 3714 ggtt_view ? ggtt_view->type : 0, 3715 size, 3716 flags & PIN_MAPPABLE ? "mappable" : "total", 3717 end); 3718 return ERR_PTR(-E2BIG); 3719 } 3720 3721 ret = i915_gem_object_get_pages(obj); 3722 if (ret) 3723 return ERR_PTR(ret); 3724 3725 i915_gem_object_pin_pages(obj); 3726 3727 vma = ggtt_view ? i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) : 3728 i915_gem_obj_lookup_or_create_vma(obj, vm); 3729 3730 if (IS_ERR(vma)) 3731 goto err_unpin; 3732 3733 if (flags & PIN_OFFSET_FIXED) { 3734 uint64_t offset = flags & PIN_OFFSET_MASK; 3735 3736 if (offset & (alignment - 1) || offset + size > end) { 3737 ret = -EINVAL; 3738 goto err_free_vma; 3739 } 3740 vma->node.start = offset; 3741 vma->node.size = size; 3742 vma->node.color = obj->cache_level; 3743 ret = drm_mm_reserve_node(&vm->mm, &vma->node); 3744 if (ret) { 3745 ret = i915_gem_evict_for_vma(vma); 3746 if (ret == 0) 3747 ret = drm_mm_reserve_node(&vm->mm, &vma->node); 3748 } 3749 if (ret) 3750 goto err_free_vma; 3751 } else { 3752 if (flags & PIN_HIGH) { 3753 search_flag = DRM_MM_SEARCH_BELOW; 3754 alloc_flag = DRM_MM_CREATE_TOP; 3755 } else { 3756 search_flag = DRM_MM_SEARCH_DEFAULT; 3757 alloc_flag = DRM_MM_CREATE_DEFAULT; 3758 } 3759 3760 search_free: 3761 ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, 3762 size, alignment, 3763 obj->cache_level, 3764 start, end, 3765 search_flag, 3766 alloc_flag); 3767 if (ret) { 3768 ret = i915_gem_evict_something(dev, vm, size, alignment, 3769 obj->cache_level, 3770 start, end, 3771 flags); 3772 if (ret == 0) 3773 goto search_free; 3774 3775 goto err_free_vma; 3776 } 3777 } 3778 if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) { 3779 ret = -EINVAL; 3780 goto err_remove_node; 3781 } 3782 3783 trace_i915_vma_bind(vma, flags); 3784 ret = i915_vma_bind(vma, obj->cache_level, flags); 3785 if (ret) 3786 goto err_remove_node; 3787 3788 list_move_tail(&obj->global_list, &dev_priv->mm.bound_list); 3789 list_add_tail(&vma->vm_link, &vm->inactive_list); 3790 3791 return vma; 3792 3793 err_remove_node: 3794 drm_mm_remove_node(&vma->node); 3795 err_free_vma: 3796 i915_gem_vma_destroy(vma); 3797 vma = ERR_PTR(ret); 3798 err_unpin: 3799 i915_gem_object_unpin_pages(obj); 3800 return vma; 3801 } 3802 3803 bool 3804 i915_gem_clflush_object(struct drm_i915_gem_object *obj, 3805 bool force) 3806 { 3807 /* If we don't have a page list set up, then we're not pinned 3808 * to GPU, and we can ignore the cache flush because it'll happen 3809 * again at bind time. 3810 */ 3811 if (obj->pages == NULL) 3812 return false; 3813 3814 /* 3815 * Stolen memory is always coherent with the GPU as it is explicitly 3816 * marked as wc by the system, or the system is cache-coherent. 3817 */ 3818 if (obj->stolen || obj->phys_handle) 3819 return false; 3820 3821 /* If the GPU is snooping the contents of the CPU cache, 3822 * we do not need to manually clear the CPU cache lines. However, 3823 * the caches are only snooped when the render cache is 3824 * flushed/invalidated. As we always have to emit invalidations 3825 * and flushes when moving into and out of the RENDER domain, correct 3826 * snooping behaviour occurs naturally as the result of our domain 3827 * tracking. 3828 */ 3829 if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) { 3830 obj->cache_dirty = true; 3831 return false; 3832 } 3833 3834 trace_i915_gem_object_clflush(obj); 3835 drm_clflush_sg(obj->pages); 3836 obj->cache_dirty = false; 3837 3838 return true; 3839 } 3840 3841 /** Flushes the GTT write domain for the object if it's dirty. */ 3842 static void 3843 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) 3844 { 3845 uint32_t old_write_domain; 3846 3847 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) 3848 return; 3849 3850 /* No actual flushing is required for the GTT write domain. Writes 3851 * to it immediately go to main memory as far as we know, so there's 3852 * no chipset flush. It also doesn't land in render cache. 3853 * 3854 * However, we do have to enforce the order so that all writes through 3855 * the GTT land before any writes to the device, such as updates to 3856 * the GATT itself. 3857 */ 3858 wmb(); 3859 3860 old_write_domain = obj->base.write_domain; 3861 obj->base.write_domain = 0; 3862 3863 intel_fb_obj_flush(obj, false, ORIGIN_GTT); 3864 3865 trace_i915_gem_object_change_domain(obj, 3866 obj->base.read_domains, 3867 old_write_domain); 3868 } 3869 3870 /** Flushes the CPU write domain for the object if it's dirty. */ 3871 static void 3872 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) 3873 { 3874 uint32_t old_write_domain; 3875 3876 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) 3877 return; 3878 3879 if (i915_gem_clflush_object(obj, obj->pin_display)) 3880 i915_gem_chipset_flush(obj->base.dev); 3881 3882 old_write_domain = obj->base.write_domain; 3883 obj->base.write_domain = 0; 3884 3885 intel_fb_obj_flush(obj, false, ORIGIN_CPU); 3886 3887 trace_i915_gem_object_change_domain(obj, 3888 obj->base.read_domains, 3889 old_write_domain); 3890 } 3891 3892 /** 3893 * Moves a single object to the GTT read, and possibly write domain. 3894 * 3895 * This function returns when the move is complete, including waiting on 3896 * flushes to occur. 3897 */ 3898 int 3899 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 3900 { 3901 struct drm_device *dev = obj->base.dev; 3902 struct drm_i915_private *dev_priv = to_i915(dev); 3903 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3904 uint32_t old_write_domain, old_read_domains; 3905 struct i915_vma *vma; 3906 int ret; 3907 3908 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) 3909 return 0; 3910 3911 ret = i915_gem_object_wait_rendering(obj, !write); 3912 if (ret) 3913 return ret; 3914 3915 /* Flush and acquire obj->pages so that we are coherent through 3916 * direct access in memory with previous cached writes through 3917 * shmemfs and that our cache domain tracking remains valid. 3918 * For example, if the obj->filp was moved to swap without us 3919 * being notified and releasing the pages, we would mistakenly 3920 * continue to assume that the obj remained out of the CPU cached 3921 * domain. 3922 */ 3923 ret = i915_gem_object_get_pages(obj); 3924 if (ret) 3925 return ret; 3926 3927 i915_gem_object_flush_cpu_write_domain(obj); 3928 3929 /* Serialise direct access to this object with the barriers for 3930 * coherent writes from the GPU, by effectively invalidating the 3931 * GTT domain upon first access. 3932 */ 3933 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3934 mb(); 3935 3936 old_write_domain = obj->base.write_domain; 3937 old_read_domains = obj->base.read_domains; 3938 3939 /* It should now be out of any other write domains, and we can update 3940 * the domain values for our changes. 3941 */ 3942 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 3943 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 3944 if (write) { 3945 obj->base.read_domains = I915_GEM_DOMAIN_GTT; 3946 obj->base.write_domain = I915_GEM_DOMAIN_GTT; 3947 obj->dirty = 1; 3948 } 3949 3950 trace_i915_gem_object_change_domain(obj, 3951 old_read_domains, 3952 old_write_domain); 3953 3954 /* And bump the LRU for this access */ 3955 vma = i915_gem_obj_to_ggtt(obj); 3956 if (vma && drm_mm_node_allocated(&vma->node) && !obj->active) 3957 list_move_tail(&vma->vm_link, 3958 &ggtt->base.inactive_list); 3959 3960 return 0; 3961 } 3962 3963 /** 3964 * Changes the cache-level of an object across all VMA. 3965 * 3966 * After this function returns, the object will be in the new cache-level 3967 * across all GTT and the contents of the backing storage will be coherent, 3968 * with respect to the new cache-level. In order to keep the backing storage 3969 * coherent for all users, we only allow a single cache level to be set 3970 * globally on the object and prevent it from being changed whilst the 3971 * hardware is reading from the object. That is if the object is currently 3972 * on the scanout it will be set to uncached (or equivalent display 3973 * cache coherency) and all non-MOCS GPU access will also be uncached so 3974 * that all direct access to the scanout remains coherent. 3975 */ 3976 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 3977 enum i915_cache_level cache_level) 3978 { 3979 struct drm_device *dev = obj->base.dev; 3980 struct i915_vma *vma, *next; 3981 bool bound = false; 3982 int ret = 0; 3983 3984 if (obj->cache_level == cache_level) 3985 goto out; 3986 3987 /* Inspect the list of currently bound VMA and unbind any that would 3988 * be invalid given the new cache-level. This is principally to 3989 * catch the issue of the CS prefetch crossing page boundaries and 3990 * reading an invalid PTE on older architectures. 3991 */ 3992 list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) { 3993 if (!drm_mm_node_allocated(&vma->node)) 3994 continue; 3995 3996 if (vma->pin_count) { 3997 DRM_DEBUG("can not change the cache level of pinned objects\n"); 3998 return -EBUSY; 3999 } 4000 4001 if (!i915_gem_valid_gtt_space(vma, cache_level)) { 4002 ret = i915_vma_unbind(vma); 4003 if (ret) 4004 return ret; 4005 } else 4006 bound = true; 4007 } 4008 4009 /* We can reuse the existing drm_mm nodes but need to change the 4010 * cache-level on the PTE. We could simply unbind them all and 4011 * rebind with the correct cache-level on next use. However since 4012 * we already have a valid slot, dma mapping, pages etc, we may as 4013 * rewrite the PTE in the belief that doing so tramples upon less 4014 * state and so involves less work. 4015 */ 4016 if (bound) { 4017 /* Before we change the PTE, the GPU must not be accessing it. 4018 * If we wait upon the object, we know that all the bound 4019 * VMA are no longer active. 4020 */ 4021 ret = i915_gem_object_wait_rendering(obj, false); 4022 if (ret) 4023 return ret; 4024 4025 if (!HAS_LLC(dev) && cache_level != I915_CACHE_NONE) { 4026 /* Access to snoopable pages through the GTT is 4027 * incoherent and on some machines causes a hard 4028 * lockup. Relinquish the CPU mmaping to force 4029 * userspace to refault in the pages and we can 4030 * then double check if the GTT mapping is still 4031 * valid for that pointer access. 4032 */ 4033 i915_gem_release_mmap(obj); 4034 4035 /* As we no longer need a fence for GTT access, 4036 * we can relinquish it now (and so prevent having 4037 * to steal a fence from someone else on the next 4038 * fence request). Note GPU activity would have 4039 * dropped the fence as all snoopable access is 4040 * supposed to be linear. 4041 */ 4042 ret = i915_gem_object_put_fence(obj); 4043 if (ret) 4044 return ret; 4045 } else { 4046 /* We either have incoherent backing store and 4047 * so no GTT access or the architecture is fully 4048 * coherent. In such cases, existing GTT mmaps 4049 * ignore the cache bit in the PTE and we can 4050 * rewrite it without confusing the GPU or having 4051 * to force userspace to fault back in its mmaps. 4052 */ 4053 } 4054 4055 list_for_each_entry(vma, &obj->vma_list, obj_link) { 4056 if (!drm_mm_node_allocated(&vma->node)) 4057 continue; 4058 4059 ret = i915_vma_bind(vma, cache_level, PIN_UPDATE); 4060 if (ret) 4061 return ret; 4062 } 4063 } 4064 4065 list_for_each_entry(vma, &obj->vma_list, obj_link) 4066 vma->node.color = cache_level; 4067 obj->cache_level = cache_level; 4068 4069 out: 4070 /* Flush the dirty CPU caches to the backing storage so that the 4071 * object is now coherent at its new cache level (with respect 4072 * to the access domain). 4073 */ 4074 if (obj->cache_dirty && 4075 obj->base.write_domain != I915_GEM_DOMAIN_CPU && 4076 cpu_write_needs_clflush(obj)) { 4077 if (i915_gem_clflush_object(obj, true)) 4078 i915_gem_chipset_flush(obj->base.dev); 4079 } 4080 4081 return 0; 4082 } 4083 4084 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 4085 struct drm_file *file) 4086 { 4087 struct drm_i915_gem_caching *args = data; 4088 struct drm_i915_gem_object *obj; 4089 4090 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 4091 if (&obj->base == NULL) 4092 return -ENOENT; 4093 4094 switch (obj->cache_level) { 4095 case I915_CACHE_LLC: 4096 case I915_CACHE_L3_LLC: 4097 args->caching = I915_CACHING_CACHED; 4098 break; 4099 4100 case I915_CACHE_WT: 4101 args->caching = I915_CACHING_DISPLAY; 4102 break; 4103 4104 default: 4105 args->caching = I915_CACHING_NONE; 4106 break; 4107 } 4108 4109 drm_gem_object_unreference_unlocked(&obj->base); 4110 return 0; 4111 } 4112 4113 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 4114 struct drm_file *file) 4115 { 4116 struct drm_i915_private *dev_priv = dev->dev_private; 4117 struct drm_i915_gem_caching *args = data; 4118 struct drm_i915_gem_object *obj; 4119 enum i915_cache_level level; 4120 int ret; 4121 4122 switch (args->caching) { 4123 case I915_CACHING_NONE: 4124 level = I915_CACHE_NONE; 4125 break; 4126 case I915_CACHING_CACHED: 4127 /* 4128 * Due to a HW issue on BXT A stepping, GPU stores via a 4129 * snooped mapping may leave stale data in a corresponding CPU 4130 * cacheline, whereas normally such cachelines would get 4131 * invalidated. 4132 */ 4133 if (!HAS_LLC(dev) && !HAS_SNOOP(dev)) 4134 return -ENODEV; 4135 4136 level = I915_CACHE_LLC; 4137 break; 4138 case I915_CACHING_DISPLAY: 4139 level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE; 4140 break; 4141 default: 4142 return -EINVAL; 4143 } 4144 4145 intel_runtime_pm_get(dev_priv); 4146 4147 ret = i915_mutex_lock_interruptible(dev); 4148 if (ret) 4149 goto rpm_put; 4150 4151 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 4152 if (&obj->base == NULL) { 4153 ret = -ENOENT; 4154 goto unlock; 4155 } 4156 4157 ret = i915_gem_object_set_cache_level(obj, level); 4158 4159 drm_gem_object_unreference(&obj->base); 4160 unlock: 4161 mutex_unlock(&dev->struct_mutex); 4162 rpm_put: 4163 intel_runtime_pm_put(dev_priv); 4164 4165 return ret; 4166 } 4167 4168 /* 4169 * Prepare buffer for display plane (scanout, cursors, etc). 4170 * Can be called from an uninterruptible phase (modesetting) and allows 4171 * any flushes to be pipelined (for pageflips). 4172 */ 4173 int 4174 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 4175 u32 alignment, 4176 const struct i915_ggtt_view *view) 4177 { 4178 u32 old_read_domains, old_write_domain; 4179 int ret; 4180 4181 /* Mark the pin_display early so that we account for the 4182 * display coherency whilst setting up the cache domains. 4183 */ 4184 obj->pin_display++; 4185 4186 /* The display engine is not coherent with the LLC cache on gen6. As 4187 * a result, we make sure that the pinning that is about to occur is 4188 * done with uncached PTEs. This is lowest common denominator for all 4189 * chipsets. 4190 * 4191 * However for gen6+, we could do better by using the GFDT bit instead 4192 * of uncaching, which would allow us to flush all the LLC-cached data 4193 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 4194 */ 4195 ret = i915_gem_object_set_cache_level(obj, 4196 HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE); 4197 if (ret) 4198 goto err_unpin_display; 4199 4200 /* As the user may map the buffer once pinned in the display plane 4201 * (e.g. libkms for the bootup splash), we have to ensure that we 4202 * always use map_and_fenceable for all scanout buffers. 4203 */ 4204 ret = i915_gem_object_ggtt_pin(obj, view, alignment, 4205 view->type == I915_GGTT_VIEW_NORMAL ? 4206 PIN_MAPPABLE : 0); 4207 if (ret) 4208 goto err_unpin_display; 4209 4210 i915_gem_object_flush_cpu_write_domain(obj); 4211 4212 old_write_domain = obj->base.write_domain; 4213 old_read_domains = obj->base.read_domains; 4214 4215 /* It should now be out of any other write domains, and we can update 4216 * the domain values for our changes. 4217 */ 4218 obj->base.write_domain = 0; 4219 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 4220 4221 trace_i915_gem_object_change_domain(obj, 4222 old_read_domains, 4223 old_write_domain); 4224 4225 return 0; 4226 4227 err_unpin_display: 4228 obj->pin_display--; 4229 return ret; 4230 } 4231 4232 void 4233 i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj, 4234 const struct i915_ggtt_view *view) 4235 { 4236 if (WARN_ON(obj->pin_display == 0)) 4237 return; 4238 4239 i915_gem_object_ggtt_unpin_view(obj, view); 4240 4241 obj->pin_display--; 4242 } 4243 4244 /** 4245 * Moves a single object to the CPU read, and possibly write domain. 4246 * 4247 * This function returns when the move is complete, including waiting on 4248 * flushes to occur. 4249 */ 4250 int 4251 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 4252 { 4253 uint32_t old_write_domain, old_read_domains; 4254 int ret; 4255 4256 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) 4257 return 0; 4258 4259 ret = i915_gem_object_wait_rendering(obj, !write); 4260 if (ret) 4261 return ret; 4262 4263 i915_gem_object_flush_gtt_write_domain(obj); 4264 4265 old_write_domain = obj->base.write_domain; 4266 old_read_domains = obj->base.read_domains; 4267 4268 /* Flush the CPU cache if it's still invalid. */ 4269 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { 4270 i915_gem_clflush_object(obj, false); 4271 4272 obj->base.read_domains |= I915_GEM_DOMAIN_CPU; 4273 } 4274 4275 /* It should now be out of any other write domains, and we can update 4276 * the domain values for our changes. 4277 */ 4278 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 4279 4280 /* If we're writing through the CPU, then the GPU read domains will 4281 * need to be invalidated at next use. 4282 */ 4283 if (write) { 4284 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4285 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4286 } 4287 4288 trace_i915_gem_object_change_domain(obj, 4289 old_read_domains, 4290 old_write_domain); 4291 4292 return 0; 4293 } 4294 4295 /* Throttle our rendering by waiting until the ring has completed our requests 4296 * emitted over 20 msec ago. 4297 * 4298 * Note that if we were to use the current jiffies each time around the loop, 4299 * we wouldn't escape the function with any frames outstanding if the time to 4300 * render a frame was over 20ms. 4301 * 4302 * This should get us reasonable parallelism between CPU and GPU but also 4303 * relatively low latency when blocking on a particular request to finish. 4304 */ 4305 static int 4306 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 4307 { 4308 struct drm_i915_private *dev_priv = dev->dev_private; 4309 struct drm_i915_file_private *file_priv = file->driver_priv; 4310 unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES; 4311 struct drm_i915_gem_request *request, *target = NULL; 4312 int ret; 4313 4314 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 4315 if (ret) 4316 return ret; 4317 4318 /* ABI: return -EIO if already wedged */ 4319 if (i915_terminally_wedged(&dev_priv->gpu_error)) 4320 return -EIO; 4321 4322 spin_lock(&file_priv->mm.lock); 4323 list_for_each_entry(request, &file_priv->mm.request_list, client_list) { 4324 if (time_after_eq(request->emitted_jiffies, recent_enough)) 4325 break; 4326 4327 /* 4328 * Note that the request might not have been submitted yet. 4329 * In which case emitted_jiffies will be zero. 4330 */ 4331 if (!request->emitted_jiffies) 4332 continue; 4333 4334 target = request; 4335 } 4336 if (target) 4337 i915_gem_request_reference(target); 4338 spin_unlock(&file_priv->mm.lock); 4339 4340 if (target == NULL) 4341 return 0; 4342 4343 ret = __i915_wait_request(target, true, NULL, NULL); 4344 if (ret == 0) 4345 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0); 4346 4347 i915_gem_request_unreference__unlocked(target); 4348 4349 return ret; 4350 } 4351 4352 static bool 4353 i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags) 4354 { 4355 struct drm_i915_gem_object *obj = vma->obj; 4356 4357 if (alignment && 4358 vma->node.start & (alignment - 1)) 4359 return true; 4360 4361 if (flags & PIN_MAPPABLE && !obj->map_and_fenceable) 4362 return true; 4363 4364 if (flags & PIN_OFFSET_BIAS && 4365 vma->node.start < (flags & PIN_OFFSET_MASK)) 4366 return true; 4367 4368 if (flags & PIN_OFFSET_FIXED && 4369 vma->node.start != (flags & PIN_OFFSET_MASK)) 4370 return true; 4371 4372 return false; 4373 } 4374 4375 void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) 4376 { 4377 struct drm_i915_gem_object *obj = vma->obj; 4378 bool mappable, fenceable; 4379 u32 fence_size, fence_alignment; 4380 4381 fence_size = i915_gem_get_gtt_size(obj->base.dev, 4382 obj->base.size, 4383 obj->tiling_mode); 4384 fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev, 4385 obj->base.size, 4386 obj->tiling_mode, 4387 true); 4388 4389 fenceable = (vma->node.size == fence_size && 4390 (vma->node.start & (fence_alignment - 1)) == 0); 4391 4392 mappable = (vma->node.start + fence_size <= 4393 to_i915(obj->base.dev)->ggtt.mappable_end); 4394 4395 obj->map_and_fenceable = mappable && fenceable; 4396 } 4397 4398 static int 4399 i915_gem_object_do_pin(struct drm_i915_gem_object *obj, 4400 struct i915_address_space *vm, 4401 const struct i915_ggtt_view *ggtt_view, 4402 uint32_t alignment, 4403 uint64_t flags) 4404 { 4405 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 4406 struct i915_vma *vma; 4407 unsigned bound; 4408 int ret; 4409 4410 if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base)) 4411 return -ENODEV; 4412 4413 if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm))) 4414 return -EINVAL; 4415 4416 if (WARN_ON((flags & (PIN_MAPPABLE | PIN_GLOBAL)) == PIN_MAPPABLE)) 4417 return -EINVAL; 4418 4419 if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view)) 4420 return -EINVAL; 4421 4422 vma = ggtt_view ? i915_gem_obj_to_ggtt_view(obj, ggtt_view) : 4423 i915_gem_obj_to_vma(obj, vm); 4424 4425 if (vma) { 4426 if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) 4427 return -EBUSY; 4428 4429 if (i915_vma_misplaced(vma, alignment, flags)) { 4430 WARN(vma->pin_count, 4431 "bo is already pinned in %s with incorrect alignment:" 4432 " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d," 4433 " obj->map_and_fenceable=%d\n", 4434 ggtt_view ? "ggtt" : "ppgtt", 4435 upper_32_bits(vma->node.start), 4436 lower_32_bits(vma->node.start), 4437 alignment, 4438 !!(flags & PIN_MAPPABLE), 4439 obj->map_and_fenceable); 4440 ret = i915_vma_unbind(vma); 4441 if (ret) 4442 return ret; 4443 4444 vma = NULL; 4445 } 4446 } 4447 4448 bound = vma ? vma->bound : 0; 4449 if (vma == NULL || !drm_mm_node_allocated(&vma->node)) { 4450 vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment, 4451 flags); 4452 if (IS_ERR(vma)) 4453 return PTR_ERR(vma); 4454 } else { 4455 ret = i915_vma_bind(vma, obj->cache_level, flags); 4456 if (ret) 4457 return ret; 4458 } 4459 4460 if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL && 4461 (bound ^ vma->bound) & GLOBAL_BIND) { 4462 __i915_vma_set_map_and_fenceable(vma); 4463 WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable); 4464 } 4465 4466 vma->pin_count++; 4467 return 0; 4468 } 4469 4470 int 4471 i915_gem_object_pin(struct drm_i915_gem_object *obj, 4472 struct i915_address_space *vm, 4473 uint32_t alignment, 4474 uint64_t flags) 4475 { 4476 return i915_gem_object_do_pin(obj, vm, 4477 i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL, 4478 alignment, flags); 4479 } 4480 4481 int 4482 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, 4483 const struct i915_ggtt_view *view, 4484 uint32_t alignment, 4485 uint64_t flags) 4486 { 4487 struct drm_device *dev = obj->base.dev; 4488 struct drm_i915_private *dev_priv = to_i915(dev); 4489 struct i915_ggtt *ggtt = &dev_priv->ggtt; 4490 4491 BUG_ON(!view); 4492 4493 return i915_gem_object_do_pin(obj, &ggtt->base, view, 4494 alignment, flags | PIN_GLOBAL); 4495 } 4496 4497 void 4498 i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj, 4499 const struct i915_ggtt_view *view) 4500 { 4501 struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view); 4502 4503 WARN_ON(vma->pin_count == 0); 4504 WARN_ON(!i915_gem_obj_ggtt_bound_view(obj, view)); 4505 4506 --vma->pin_count; 4507 } 4508 4509 int 4510 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 4511 struct drm_file *file) 4512 { 4513 struct drm_i915_gem_busy *args = data; 4514 struct drm_i915_gem_object *obj; 4515 int ret; 4516 4517 ret = i915_mutex_lock_interruptible(dev); 4518 if (ret) 4519 return ret; 4520 4521 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 4522 if (&obj->base == NULL) { 4523 ret = -ENOENT; 4524 goto unlock; 4525 } 4526 4527 /* Count all active objects as busy, even if they are currently not used 4528 * by the gpu. Users of this interface expect objects to eventually 4529 * become non-busy without any further actions, therefore emit any 4530 * necessary flushes here. 4531 */ 4532 ret = i915_gem_object_flush_active(obj); 4533 if (ret) 4534 goto unref; 4535 4536 args->busy = 0; 4537 if (obj->active) { 4538 int i; 4539 4540 for (i = 0; i < I915_NUM_ENGINES; i++) { 4541 struct drm_i915_gem_request *req; 4542 4543 req = obj->last_read_req[i]; 4544 if (req) 4545 args->busy |= 1 << (16 + req->engine->exec_id); 4546 } 4547 if (obj->last_write_req) 4548 args->busy |= obj->last_write_req->engine->exec_id; 4549 } 4550 4551 unref: 4552 drm_gem_object_unreference(&obj->base); 4553 unlock: 4554 mutex_unlock(&dev->struct_mutex); 4555 return ret; 4556 } 4557 4558 int 4559 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 4560 struct drm_file *file_priv) 4561 { 4562 return i915_gem_ring_throttle(dev, file_priv); 4563 } 4564 4565 int 4566 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 4567 struct drm_file *file_priv) 4568 { 4569 struct drm_i915_private *dev_priv = dev->dev_private; 4570 struct drm_i915_gem_madvise *args = data; 4571 struct drm_i915_gem_object *obj; 4572 int ret; 4573 4574 switch (args->madv) { 4575 case I915_MADV_DONTNEED: 4576 case I915_MADV_WILLNEED: 4577 break; 4578 default: 4579 return -EINVAL; 4580 } 4581 4582 ret = i915_mutex_lock_interruptible(dev); 4583 if (ret) 4584 return ret; 4585 4586 obj = to_intel_bo(drm_gem_object_lookup(file_priv, args->handle)); 4587 if (&obj->base == NULL) { 4588 ret = -ENOENT; 4589 goto unlock; 4590 } 4591 4592 if (i915_gem_obj_is_pinned(obj)) { 4593 ret = -EINVAL; 4594 goto out; 4595 } 4596 4597 if (obj->pages && 4598 obj->tiling_mode != I915_TILING_NONE && 4599 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 4600 if (obj->madv == I915_MADV_WILLNEED) 4601 i915_gem_object_unpin_pages(obj); 4602 if (args->madv == I915_MADV_WILLNEED) 4603 i915_gem_object_pin_pages(obj); 4604 } 4605 4606 if (obj->madv != __I915_MADV_PURGED) 4607 obj->madv = args->madv; 4608 4609 /* if the object is no longer attached, discard its backing storage */ 4610 if (obj->madv == I915_MADV_DONTNEED && obj->pages == NULL) 4611 i915_gem_object_truncate(obj); 4612 4613 args->retained = obj->madv != __I915_MADV_PURGED; 4614 4615 out: 4616 drm_gem_object_unreference(&obj->base); 4617 unlock: 4618 mutex_unlock(&dev->struct_mutex); 4619 return ret; 4620 } 4621 4622 void i915_gem_object_init(struct drm_i915_gem_object *obj, 4623 const struct drm_i915_gem_object_ops *ops) 4624 { 4625 int i; 4626 4627 INIT_LIST_HEAD(&obj->global_list); 4628 for (i = 0; i < I915_NUM_ENGINES; i++) 4629 INIT_LIST_HEAD(&obj->engine_list[i]); 4630 INIT_LIST_HEAD(&obj->obj_exec_link); 4631 INIT_LIST_HEAD(&obj->vma_list); 4632 INIT_LIST_HEAD(&obj->batch_pool_link); 4633 4634 obj->ops = ops; 4635 4636 obj->fence_reg = I915_FENCE_REG_NONE; 4637 obj->madv = I915_MADV_WILLNEED; 4638 4639 i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size); 4640 } 4641 4642 static const struct drm_i915_gem_object_ops i915_gem_object_ops = { 4643 .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE, 4644 .get_pages = i915_gem_object_get_pages_gtt, 4645 .put_pages = i915_gem_object_put_pages_gtt, 4646 }; 4647 4648 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, 4649 size_t size) 4650 { 4651 struct drm_i915_gem_object *obj; 4652 #if 0 4653 struct address_space *mapping; 4654 gfp_t mask; 4655 #endif 4656 4657 obj = i915_gem_object_alloc(dev); 4658 if (obj == NULL) 4659 return NULL; 4660 4661 if (drm_gem_object_init(dev, &obj->base, size) != 0) { 4662 i915_gem_object_free(obj); 4663 return NULL; 4664 } 4665 4666 #if 0 4667 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 4668 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) { 4669 /* 965gm cannot relocate objects above 4GiB. */ 4670 mask &= ~__GFP_HIGHMEM; 4671 mask |= __GFP_DMA32; 4672 } 4673 4674 mapping = file_inode(obj->base.filp)->i_mapping; 4675 mapping_set_gfp_mask(mapping, mask); 4676 #endif 4677 4678 i915_gem_object_init(obj, &i915_gem_object_ops); 4679 4680 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4681 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4682 4683 if (HAS_LLC(dev)) { 4684 /* On some devices, we can have the GPU use the LLC (the CPU 4685 * cache) for about a 10% performance improvement 4686 * compared to uncached. Graphics requests other than 4687 * display scanout are coherent with the CPU in 4688 * accessing this cache. This means in this mode we 4689 * don't need to clflush on the CPU side, and on the 4690 * GPU side we only need to flush internal caches to 4691 * get data visible to the CPU. 4692 * 4693 * However, we maintain the display planes as UC, and so 4694 * need to rebind when first used as such. 4695 */ 4696 obj->cache_level = I915_CACHE_LLC; 4697 } else 4698 obj->cache_level = I915_CACHE_NONE; 4699 4700 trace_i915_gem_object_create(obj); 4701 4702 return obj; 4703 } 4704 4705 static bool discard_backing_storage(struct drm_i915_gem_object *obj) 4706 { 4707 /* If we are the last user of the backing storage (be it shmemfs 4708 * pages or stolen etc), we know that the pages are going to be 4709 * immediately released. In this case, we can then skip copying 4710 * back the contents from the GPU. 4711 */ 4712 4713 if (obj->madv != I915_MADV_WILLNEED) 4714 return false; 4715 4716 if (obj->base.vm_obj == NULL) 4717 return true; 4718 4719 /* At first glance, this looks racy, but then again so would be 4720 * userspace racing mmap against close. However, the first external 4721 * reference to the filp can only be obtained through the 4722 * i915_gem_mmap_ioctl() which safeguards us against the user 4723 * acquiring such a reference whilst we are in the middle of 4724 * freeing the object. 4725 */ 4726 #if 0 4727 return atomic_long_read(&obj->base.filp->f_count) == 1; 4728 #else 4729 return false; 4730 #endif 4731 } 4732 4733 void i915_gem_free_object(struct drm_gem_object *gem_obj) 4734 { 4735 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 4736 struct drm_device *dev = obj->base.dev; 4737 struct drm_i915_private *dev_priv = dev->dev_private; 4738 struct i915_vma *vma, *next; 4739 4740 intel_runtime_pm_get(dev_priv); 4741 4742 trace_i915_gem_object_destroy(obj); 4743 4744 list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) { 4745 int ret; 4746 4747 vma->pin_count = 0; 4748 ret = i915_vma_unbind(vma); 4749 if (WARN_ON(ret == -ERESTARTSYS)) { 4750 bool was_interruptible; 4751 4752 was_interruptible = dev_priv->mm.interruptible; 4753 dev_priv->mm.interruptible = false; 4754 4755 WARN_ON(i915_vma_unbind(vma)); 4756 4757 dev_priv->mm.interruptible = was_interruptible; 4758 } 4759 } 4760 4761 /* Stolen objects don't hold a ref, but do hold pin count. Fix that up 4762 * before progressing. */ 4763 if (obj->stolen) 4764 i915_gem_object_unpin_pages(obj); 4765 4766 WARN_ON(obj->frontbuffer_bits); 4767 4768 if (obj->pages && obj->madv == I915_MADV_WILLNEED && 4769 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES && 4770 obj->tiling_mode != I915_TILING_NONE) 4771 i915_gem_object_unpin_pages(obj); 4772 4773 if (WARN_ON(obj->pages_pin_count)) 4774 obj->pages_pin_count = 0; 4775 if (discard_backing_storage(obj)) 4776 obj->madv = I915_MADV_DONTNEED; 4777 i915_gem_object_put_pages(obj); 4778 i915_gem_object_free_mmap_offset(obj); 4779 4780 BUG_ON(obj->pages); 4781 4782 #if 0 4783 if (obj->base.import_attach) 4784 drm_prime_gem_destroy(&obj->base, NULL); 4785 #endif 4786 4787 if (obj->ops->release) 4788 obj->ops->release(obj); 4789 4790 drm_gem_object_release(&obj->base); 4791 i915_gem_info_remove_obj(dev_priv, obj->base.size); 4792 4793 kfree(obj->bit_17); 4794 i915_gem_object_free(obj); 4795 4796 intel_runtime_pm_put(dev_priv); 4797 } 4798 4799 struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, 4800 struct i915_address_space *vm) 4801 { 4802 struct i915_vma *vma; 4803 list_for_each_entry(vma, &obj->vma_list, obj_link) { 4804 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL && 4805 vma->vm == vm) 4806 return vma; 4807 } 4808 return NULL; 4809 } 4810 4811 struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj, 4812 const struct i915_ggtt_view *view) 4813 { 4814 struct drm_device *dev = obj->base.dev; 4815 struct drm_i915_private *dev_priv = to_i915(dev); 4816 struct i915_ggtt *ggtt = &dev_priv->ggtt; 4817 struct i915_vma *vma; 4818 4819 BUG_ON(!view); 4820 4821 list_for_each_entry(vma, &obj->vma_list, obj_link) 4822 if (vma->vm == &ggtt->base && 4823 i915_ggtt_view_equal(&vma->ggtt_view, view)) 4824 return vma; 4825 return NULL; 4826 } 4827 4828 void i915_gem_vma_destroy(struct i915_vma *vma) 4829 { 4830 WARN_ON(vma->node.allocated); 4831 4832 /* Keep the vma as a placeholder in the execbuffer reservation lists */ 4833 if (!list_empty(&vma->exec_list)) 4834 return; 4835 4836 if (!vma->is_ggtt) 4837 i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm)); 4838 4839 list_del(&vma->obj_link); 4840 4841 kfree(vma); 4842 } 4843 4844 static void 4845 i915_gem_stop_engines(struct drm_device *dev) 4846 { 4847 struct drm_i915_private *dev_priv = dev->dev_private; 4848 struct intel_engine_cs *engine; 4849 4850 for_each_engine(engine, dev_priv) 4851 dev_priv->gt.stop_engine(engine); 4852 } 4853 4854 int 4855 i915_gem_suspend(struct drm_device *dev) 4856 { 4857 struct drm_i915_private *dev_priv = dev->dev_private; 4858 int ret = 0; 4859 4860 mutex_lock(&dev->struct_mutex); 4861 ret = i915_gpu_idle(dev); 4862 if (ret) 4863 goto err; 4864 4865 i915_gem_retire_requests(dev); 4866 4867 i915_gem_stop_engines(dev); 4868 mutex_unlock(&dev->struct_mutex); 4869 4870 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); 4871 cancel_delayed_work_sync(&dev_priv->mm.retire_work); 4872 #if 0 4873 flush_delayed_work(&dev_priv->mm.idle_work); 4874 #endif 4875 4876 /* Assert that we sucessfully flushed all the work and 4877 * reset the GPU back to its idle, low power state. 4878 */ 4879 WARN_ON(dev_priv->mm.busy); 4880 4881 return 0; 4882 4883 err: 4884 mutex_unlock(&dev->struct_mutex); 4885 return ret; 4886 } 4887 4888 int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice) 4889 { 4890 struct intel_engine_cs *engine = req->engine; 4891 struct drm_device *dev = engine->dev; 4892 struct drm_i915_private *dev_priv = dev->dev_private; 4893 u32 *remap_info = dev_priv->l3_parity.remap_info[slice]; 4894 int i, ret; 4895 4896 if (!HAS_L3_DPF(dev) || !remap_info) 4897 return 0; 4898 4899 ret = intel_ring_begin(req, GEN7_L3LOG_SIZE / 4 * 3); 4900 if (ret) 4901 return ret; 4902 4903 /* 4904 * Note: We do not worry about the concurrent register cacheline hang 4905 * here because no other code should access these registers other than 4906 * at initialization time. 4907 */ 4908 for (i = 0; i < GEN7_L3LOG_SIZE / 4; i++) { 4909 intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1)); 4910 intel_ring_emit_reg(engine, GEN7_L3LOG(slice, i)); 4911 intel_ring_emit(engine, remap_info[i]); 4912 } 4913 4914 intel_ring_advance(engine); 4915 4916 return ret; 4917 } 4918 4919 void i915_gem_init_swizzling(struct drm_device *dev) 4920 { 4921 struct drm_i915_private *dev_priv = dev->dev_private; 4922 4923 if (INTEL_INFO(dev)->gen < 5 || 4924 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 4925 return; 4926 4927 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 4928 DISP_TILE_SURFACE_SWIZZLING); 4929 4930 if (IS_GEN5(dev)) 4931 return; 4932 4933 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 4934 if (IS_GEN6(dev)) 4935 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 4936 else if (IS_GEN7(dev)) 4937 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 4938 else if (IS_GEN8(dev)) 4939 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW)); 4940 else 4941 BUG(); 4942 } 4943 4944 static void init_unused_ring(struct drm_device *dev, u32 base) 4945 { 4946 struct drm_i915_private *dev_priv = dev->dev_private; 4947 4948 I915_WRITE(RING_CTL(base), 0); 4949 I915_WRITE(RING_HEAD(base), 0); 4950 I915_WRITE(RING_TAIL(base), 0); 4951 I915_WRITE(RING_START(base), 0); 4952 } 4953 4954 static void init_unused_rings(struct drm_device *dev) 4955 { 4956 if (IS_I830(dev)) { 4957 init_unused_ring(dev, PRB1_BASE); 4958 init_unused_ring(dev, SRB0_BASE); 4959 init_unused_ring(dev, SRB1_BASE); 4960 init_unused_ring(dev, SRB2_BASE); 4961 init_unused_ring(dev, SRB3_BASE); 4962 } else if (IS_GEN2(dev)) { 4963 init_unused_ring(dev, SRB0_BASE); 4964 init_unused_ring(dev, SRB1_BASE); 4965 } else if (IS_GEN3(dev)) { 4966 init_unused_ring(dev, PRB1_BASE); 4967 init_unused_ring(dev, PRB2_BASE); 4968 } 4969 } 4970 4971 int i915_gem_init_engines(struct drm_device *dev) 4972 { 4973 struct drm_i915_private *dev_priv = dev->dev_private; 4974 int ret; 4975 4976 ret = intel_init_render_ring_buffer(dev); 4977 if (ret) 4978 return ret; 4979 4980 if (HAS_BSD(dev)) { 4981 ret = intel_init_bsd_ring_buffer(dev); 4982 if (ret) 4983 goto cleanup_render_ring; 4984 } 4985 4986 if (HAS_BLT(dev)) { 4987 ret = intel_init_blt_ring_buffer(dev); 4988 if (ret) 4989 goto cleanup_bsd_ring; 4990 } 4991 4992 if (HAS_VEBOX(dev)) { 4993 ret = intel_init_vebox_ring_buffer(dev); 4994 if (ret) 4995 goto cleanup_blt_ring; 4996 } 4997 4998 if (HAS_BSD2(dev)) { 4999 ret = intel_init_bsd2_ring_buffer(dev); 5000 if (ret) 5001 goto cleanup_vebox_ring; 5002 } 5003 5004 return 0; 5005 5006 cleanup_vebox_ring: 5007 intel_cleanup_engine(&dev_priv->engine[VECS]); 5008 cleanup_blt_ring: 5009 intel_cleanup_engine(&dev_priv->engine[BCS]); 5010 cleanup_bsd_ring: 5011 intel_cleanup_engine(&dev_priv->engine[VCS]); 5012 cleanup_render_ring: 5013 intel_cleanup_engine(&dev_priv->engine[RCS]); 5014 5015 return ret; 5016 } 5017 5018 int 5019 i915_gem_init_hw(struct drm_device *dev) 5020 { 5021 struct drm_i915_private *dev_priv = dev->dev_private; 5022 struct intel_engine_cs *engine; 5023 int ret, j; 5024 5025 /* Double layer security blanket, see i915_gem_init() */ 5026 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5027 5028 if (HAS_EDRAM(dev) && INTEL_GEN(dev_priv) < 9) 5029 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf)); 5030 5031 if (IS_HASWELL(dev)) 5032 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev) ? 5033 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); 5034 5035 if (HAS_PCH_NOP(dev)) { 5036 if (IS_IVYBRIDGE(dev)) { 5037 u32 temp = I915_READ(GEN7_MSG_CTL); 5038 temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK); 5039 I915_WRITE(GEN7_MSG_CTL, temp); 5040 } else if (INTEL_INFO(dev)->gen >= 7) { 5041 u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT); 5042 temp &= ~RESET_PCH_HANDSHAKE_ENABLE; 5043 I915_WRITE(HSW_NDE_RSTWRN_OPT, temp); 5044 } 5045 } 5046 5047 i915_gem_init_swizzling(dev); 5048 5049 /* 5050 * At least 830 can leave some of the unused rings 5051 * "active" (ie. head != tail) after resume which 5052 * will prevent c3 entry. Makes sure all unused rings 5053 * are totally idle. 5054 */ 5055 init_unused_rings(dev); 5056 5057 BUG_ON(!dev_priv->kernel_context); 5058 5059 ret = i915_ppgtt_init_hw(dev); 5060 if (ret) { 5061 DRM_ERROR("PPGTT enable HW failed %d\n", ret); 5062 goto out; 5063 } 5064 5065 /* Need to do basic initialisation of all rings first: */ 5066 for_each_engine(engine, dev_priv) { 5067 ret = engine->init_hw(engine); 5068 if (ret) 5069 goto out; 5070 } 5071 5072 intel_mocs_init_l3cc_table(dev); 5073 5074 /* We can't enable contexts until all firmware is loaded */ 5075 if (HAS_GUC_UCODE(dev)) { 5076 ret = intel_guc_ucode_load(dev); 5077 if (ret) { 5078 DRM_ERROR("Failed to initialize GuC, error %d\n", ret); 5079 ret = -EIO; 5080 goto out; 5081 } 5082 } 5083 5084 /* 5085 * Increment the next seqno by 0x100 so we have a visible break 5086 * on re-initialisation 5087 */ 5088 ret = i915_gem_set_seqno(dev, dev_priv->next_seqno+0x100); 5089 if (ret) 5090 goto out; 5091 5092 /* Now it is safe to go back round and do everything else: */ 5093 for_each_engine(engine, dev_priv) { 5094 struct drm_i915_gem_request *req; 5095 5096 req = i915_gem_request_alloc(engine, NULL); 5097 if (IS_ERR(req)) { 5098 ret = PTR_ERR(req); 5099 break; 5100 } 5101 5102 if (engine->id == RCS) { 5103 for (j = 0; j < NUM_L3_SLICES(dev); j++) { 5104 ret = i915_gem_l3_remap(req, j); 5105 if (ret) 5106 goto err_request; 5107 } 5108 } 5109 5110 ret = i915_ppgtt_init_ring(req); 5111 if (ret) 5112 goto err_request; 5113 5114 ret = i915_gem_context_enable(req); 5115 if (ret) 5116 goto err_request; 5117 5118 err_request: 5119 i915_add_request_no_flush(req); 5120 if (ret) { 5121 DRM_ERROR("Failed to enable %s, error=%d\n", 5122 engine->name, ret); 5123 i915_gem_cleanup_engines(dev); 5124 break; 5125 } 5126 } 5127 5128 out: 5129 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5130 return ret; 5131 } 5132 5133 int i915_gem_init(struct drm_device *dev) 5134 { 5135 struct drm_i915_private *dev_priv = dev->dev_private; 5136 int ret; 5137 5138 i915.enable_execlists = intel_sanitize_enable_execlists(dev, 5139 i915.enable_execlists); 5140 5141 mutex_lock(&dev->struct_mutex); 5142 5143 if (!i915.enable_execlists) { 5144 dev_priv->gt.execbuf_submit = i915_gem_ringbuffer_submission; 5145 dev_priv->gt.init_engines = i915_gem_init_engines; 5146 dev_priv->gt.cleanup_engine = intel_cleanup_engine; 5147 dev_priv->gt.stop_engine = intel_stop_engine; 5148 } else { 5149 dev_priv->gt.execbuf_submit = intel_execlists_submission; 5150 dev_priv->gt.init_engines = intel_logical_rings_init; 5151 dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup; 5152 dev_priv->gt.stop_engine = intel_logical_ring_stop; 5153 } 5154 5155 /* This is just a security blanket to placate dragons. 5156 * On some systems, we very sporadically observe that the first TLBs 5157 * used by the CS may be stale, despite us poking the TLB reset. If 5158 * we hold the forcewake during initialisation these problems 5159 * just magically go away. 5160 */ 5161 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5162 5163 ret = i915_gem_init_userptr(dev); 5164 if (ret) 5165 goto out_unlock; 5166 5167 i915_gem_init_ggtt(dev); 5168 5169 ret = i915_gem_context_init(dev); 5170 if (ret) 5171 goto out_unlock; 5172 5173 ret = dev_priv->gt.init_engines(dev); 5174 if (ret) 5175 goto out_unlock; 5176 5177 ret = i915_gem_init_hw(dev); 5178 if (ret == -EIO) { 5179 /* Allow ring initialisation to fail by marking the GPU as 5180 * wedged. But we only want to do this where the GPU is angry, 5181 * for all other failure, such as an allocation failure, bail. 5182 */ 5183 DRM_ERROR("Failed to initialize GPU, declaring it wedged\n"); 5184 atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter); 5185 ret = 0; 5186 } 5187 5188 out_unlock: 5189 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5190 mutex_unlock(&dev->struct_mutex); 5191 5192 return ret; 5193 } 5194 5195 void 5196 i915_gem_cleanup_engines(struct drm_device *dev) 5197 { 5198 struct drm_i915_private *dev_priv = dev->dev_private; 5199 struct intel_engine_cs *engine; 5200 5201 for_each_engine(engine, dev_priv) 5202 dev_priv->gt.cleanup_engine(engine); 5203 5204 if (i915.enable_execlists) 5205 /* 5206 * Neither the BIOS, ourselves or any other kernel 5207 * expects the system to be in execlists mode on startup, 5208 * so we need to reset the GPU back to legacy mode. 5209 */ 5210 intel_gpu_reset(dev, ALL_ENGINES); 5211 } 5212 5213 static void 5214 init_engine_lists(struct intel_engine_cs *engine) 5215 { 5216 INIT_LIST_HEAD(&engine->active_list); 5217 INIT_LIST_HEAD(&engine->request_list); 5218 } 5219 5220 void 5221 i915_gem_load_init_fences(struct drm_i915_private *dev_priv) 5222 { 5223 struct drm_device *dev = dev_priv->dev; 5224 5225 if (INTEL_INFO(dev_priv)->gen >= 7 && !IS_VALLEYVIEW(dev_priv) && 5226 !IS_CHERRYVIEW(dev_priv)) 5227 dev_priv->num_fence_regs = 32; 5228 else if (INTEL_INFO(dev_priv)->gen >= 4 || IS_I945G(dev_priv) || 5229 IS_I945GM(dev_priv) || IS_G33(dev_priv)) 5230 dev_priv->num_fence_regs = 16; 5231 else 5232 dev_priv->num_fence_regs = 8; 5233 5234 if (intel_vgpu_active(dev)) 5235 dev_priv->num_fence_regs = 5236 I915_READ(vgtif_reg(avail_rs.fence_num)); 5237 5238 /* Initialize fence registers to zero */ 5239 i915_gem_restore_fences(dev); 5240 5241 i915_gem_detect_bit_6_swizzle(dev); 5242 } 5243 5244 void 5245 i915_gem_load_init(struct drm_device *dev) 5246 { 5247 struct drm_i915_private *dev_priv = dev->dev_private; 5248 int i; 5249 5250 INIT_LIST_HEAD(&dev_priv->vm_list); 5251 INIT_LIST_HEAD(&dev_priv->context_list); 5252 INIT_LIST_HEAD(&dev_priv->mm.unbound_list); 5253 INIT_LIST_HEAD(&dev_priv->mm.bound_list); 5254 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 5255 for (i = 0; i < I915_NUM_ENGINES; i++) 5256 init_engine_lists(&dev_priv->engine[i]); 5257 for (i = 0; i < I915_MAX_NUM_FENCES; i++) 5258 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); 5259 INIT_DELAYED_WORK(&dev_priv->mm.retire_work, 5260 i915_gem_retire_work_handler); 5261 INIT_DELAYED_WORK(&dev_priv->mm.idle_work, 5262 i915_gem_idle_work_handler); 5263 init_waitqueue_head(&dev_priv->gpu_error.reset_queue); 5264 5265 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; 5266 5267 /* 5268 * Set initial sequence number for requests. 5269 * Using this number allows the wraparound to happen early, 5270 * catching any obvious problems. 5271 */ 5272 dev_priv->next_seqno = ((u32)~0 - 0x1100); 5273 dev_priv->last_seqno = ((u32)~0 - 0x1101); 5274 5275 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 5276 5277 init_waitqueue_head(&dev_priv->pending_flip_queue); 5278 5279 dev_priv->mm.interruptible = true; 5280 5281 lockinit(&dev_priv->fb_tracking.lock, "drmftl", 0, LK_CANRECURSE); 5282 } 5283 5284 void i915_gem_load_cleanup(struct drm_device *dev) 5285 { 5286 #if 0 5287 struct drm_i915_private *dev_priv = to_i915(dev); 5288 5289 kmem_cache_destroy(dev_priv->requests); 5290 kmem_cache_destroy(dev_priv->vmas); 5291 kmem_cache_destroy(dev_priv->objects); 5292 #endif 5293 } 5294 5295 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 5296 { 5297 struct drm_i915_file_private *file_priv = file->driver_priv; 5298 5299 /* Clean up our request list when the client is going away, so that 5300 * later retire_requests won't dereference our soon-to-be-gone 5301 * file_priv. 5302 */ 5303 spin_lock(&file_priv->mm.lock); 5304 while (!list_empty(&file_priv->mm.request_list)) { 5305 struct drm_i915_gem_request *request; 5306 5307 request = list_first_entry(&file_priv->mm.request_list, 5308 struct drm_i915_gem_request, 5309 client_list); 5310 list_del(&request->client_list); 5311 request->file_priv = NULL; 5312 } 5313 spin_unlock(&file_priv->mm.lock); 5314 5315 if (!list_empty(&file_priv->rps.link)) { 5316 lockmgr(&to_i915(dev)->rps.client_lock, LK_EXCLUSIVE); 5317 list_del(&file_priv->rps.link); 5318 lockmgr(&to_i915(dev)->rps.client_lock, LK_RELEASE); 5319 } 5320 } 5321 5322 int 5323 i915_gem_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, 5324 vm_ooffset_t foff, struct ucred *cred, u_short *color) 5325 { 5326 *color = 0; /* XXXKIB */ 5327 return (0); 5328 } 5329 5330 void 5331 i915_gem_pager_dtor(void *handle) 5332 { 5333 struct drm_gem_object *obj; 5334 struct drm_device *dev; 5335 5336 obj = handle; 5337 dev = obj->dev; 5338 5339 mutex_lock(&dev->struct_mutex); 5340 drm_gem_free_mmap_offset(obj); 5341 i915_gem_release_mmap(to_intel_bo(obj)); 5342 drm_gem_object_unreference(obj); 5343 mutex_unlock(&dev->struct_mutex); 5344 } 5345 5346 int i915_gem_open(struct drm_device *dev, struct drm_file *file) 5347 { 5348 struct drm_i915_file_private *file_priv; 5349 int ret; 5350 5351 DRM_DEBUG_DRIVER("\n"); 5352 5353 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 5354 if (!file_priv) 5355 return -ENOMEM; 5356 5357 file->driver_priv = file_priv; 5358 file_priv->dev_priv = dev->dev_private; 5359 file_priv->file = file; 5360 INIT_LIST_HEAD(&file_priv->rps.link); 5361 5362 spin_init(&file_priv->mm.lock, "i915_priv"); 5363 INIT_LIST_HEAD(&file_priv->mm.request_list); 5364 5365 file_priv->bsd_ring = -1; 5366 5367 ret = i915_gem_context_open(dev, file); 5368 if (ret) 5369 kfree(file_priv); 5370 5371 return ret; 5372 } 5373 5374 /** 5375 * i915_gem_track_fb - update frontbuffer tracking 5376 * @old: current GEM buffer for the frontbuffer slots 5377 * @new: new GEM buffer for the frontbuffer slots 5378 * @frontbuffer_bits: bitmask of frontbuffer slots 5379 * 5380 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them 5381 * from @old and setting them in @new. Both @old and @new can be NULL. 5382 */ 5383 void i915_gem_track_fb(struct drm_i915_gem_object *old, 5384 struct drm_i915_gem_object *new, 5385 unsigned frontbuffer_bits) 5386 { 5387 if (old) { 5388 WARN_ON(!mutex_is_locked(&old->base.dev->struct_mutex)); 5389 WARN_ON(!(old->frontbuffer_bits & frontbuffer_bits)); 5390 old->frontbuffer_bits &= ~frontbuffer_bits; 5391 } 5392 5393 if (new) { 5394 WARN_ON(!mutex_is_locked(&new->base.dev->struct_mutex)); 5395 WARN_ON(new->frontbuffer_bits & frontbuffer_bits); 5396 new->frontbuffer_bits |= frontbuffer_bits; 5397 } 5398 } 5399 5400 /* All the new VM stuff */ 5401 u64 i915_gem_obj_offset(struct drm_i915_gem_object *o, 5402 struct i915_address_space *vm) 5403 { 5404 struct drm_i915_private *dev_priv = o->base.dev->dev_private; 5405 struct i915_vma *vma; 5406 5407 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); 5408 5409 list_for_each_entry(vma, &o->vma_list, obj_link) { 5410 if (vma->is_ggtt && 5411 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5412 continue; 5413 if (vma->vm == vm) 5414 return vma->node.start; 5415 } 5416 5417 WARN(1, "%s vma for this object not found.\n", 5418 i915_is_ggtt(vm) ? "global" : "ppgtt"); 5419 return -1; 5420 } 5421 5422 u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o, 5423 const struct i915_ggtt_view *view) 5424 { 5425 struct drm_i915_private *dev_priv = to_i915(o->base.dev); 5426 struct i915_ggtt *ggtt = &dev_priv->ggtt; 5427 struct i915_vma *vma; 5428 5429 list_for_each_entry(vma, &o->vma_list, obj_link) 5430 if (vma->vm == &ggtt->base && 5431 i915_ggtt_view_equal(&vma->ggtt_view, view)) 5432 return vma->node.start; 5433 5434 WARN(1, "global vma for this object not found. (view=%u)\n", view->type); 5435 return -1; 5436 } 5437 5438 bool i915_gem_obj_bound(struct drm_i915_gem_object *o, 5439 struct i915_address_space *vm) 5440 { 5441 struct i915_vma *vma; 5442 5443 list_for_each_entry(vma, &o->vma_list, obj_link) { 5444 if (vma->is_ggtt && 5445 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5446 continue; 5447 if (vma->vm == vm && drm_mm_node_allocated(&vma->node)) 5448 return true; 5449 } 5450 5451 return false; 5452 } 5453 5454 bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o, 5455 const struct i915_ggtt_view *view) 5456 { 5457 struct drm_i915_private *dev_priv = to_i915(o->base.dev); 5458 struct i915_ggtt *ggtt = &dev_priv->ggtt; 5459 struct i915_vma *vma; 5460 5461 list_for_each_entry(vma, &o->vma_list, obj_link) 5462 if (vma->vm == &ggtt->base && 5463 i915_ggtt_view_equal(&vma->ggtt_view, view) && 5464 drm_mm_node_allocated(&vma->node)) 5465 return true; 5466 5467 return false; 5468 } 5469 5470 bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o) 5471 { 5472 struct i915_vma *vma; 5473 5474 list_for_each_entry(vma, &o->vma_list, obj_link) 5475 if (drm_mm_node_allocated(&vma->node)) 5476 return true; 5477 5478 return false; 5479 } 5480 5481 unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o, 5482 struct i915_address_space *vm) 5483 { 5484 struct drm_i915_private *dev_priv = o->base.dev->dev_private; 5485 struct i915_vma *vma; 5486 5487 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); 5488 5489 BUG_ON(list_empty(&o->vma_list)); 5490 5491 list_for_each_entry(vma, &o->vma_list, obj_link) { 5492 if (vma->is_ggtt && 5493 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5494 continue; 5495 if (vma->vm == vm) 5496 return vma->node.size; 5497 } 5498 return 0; 5499 } 5500 5501 bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj) 5502 { 5503 struct i915_vma *vma; 5504 list_for_each_entry(vma, &obj->vma_list, obj_link) 5505 if (vma->pin_count > 0) 5506 return true; 5507 5508 return false; 5509 } 5510 5511 /* Like i915_gem_object_get_page(), but mark the returned page dirty */ 5512 struct vm_page * 5513 i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n) 5514 { 5515 struct vm_page *page; 5516 5517 /* Only default objects have per-page dirty tracking */ 5518 if (WARN_ON((obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE) == 0)) 5519 return NULL; 5520 5521 page = i915_gem_object_get_page(obj, n); 5522 set_page_dirty(page); 5523 return page; 5524 } 5525 5526 /* Allocate a new GEM object and fill it with the supplied data */ 5527 struct drm_i915_gem_object * 5528 i915_gem_object_create_from_data(struct drm_device *dev, 5529 const void *data, size_t size) 5530 { 5531 struct drm_i915_gem_object *obj; 5532 struct sg_table *sg; 5533 size_t bytes; 5534 int ret; 5535 5536 obj = i915_gem_alloc_object(dev, round_up(size, PAGE_SIZE)); 5537 if (IS_ERR_OR_NULL(obj)) 5538 return obj; 5539 5540 ret = i915_gem_object_set_to_cpu_domain(obj, true); 5541 if (ret) 5542 goto fail; 5543 5544 ret = i915_gem_object_get_pages(obj); 5545 if (ret) 5546 goto fail; 5547 5548 i915_gem_object_pin_pages(obj); 5549 sg = obj->pages; 5550 bytes = sg_copy_from_buffer(sg->sgl, sg->nents, data, size); 5551 obj->dirty = 1; /* Backing store is now out of date */ 5552 i915_gem_object_unpin_pages(obj); 5553 5554 if (WARN_ON(bytes != size)) { 5555 DRM_ERROR("Incomplete copy, wrote %zu of %zu", bytes, size); 5556 ret = -EFAULT; 5557 goto fail; 5558 } 5559 5560 return obj; 5561 5562 fail: 5563 drm_gem_object_unreference(&obj->base); 5564 return ERR_PTR(ret); 5565 } 5566