1 /* 2 * Copyright © 2008-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include <drm/drmP.h> 29 #include <drm/drm_vma_manager.h> 30 #include <drm/i915_drm.h> 31 #include "i915_drv.h" 32 #include "i915_gem_dmabuf.h" 33 #include "i915_vgpu.h" 34 #include "i915_trace.h" 35 #include "intel_drv.h" 36 #include "intel_mocs.h" 37 #include <linux/reservation.h> 38 #include <linux/shmem_fs.h> 39 #include <linux/slab.h> 40 #include <linux/swap.h> 41 #include <linux/pci.h> 42 #include <linux/dma-buf.h> 43 44 #include <sys/mman.h> 45 #include <vm/vm_map.h> 46 #include <vm/vm_param.h> 47 48 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); 49 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); 50 static void 51 i915_gem_object_retire__write(struct drm_i915_gem_object *obj); 52 static void 53 i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring); 54 55 static bool cpu_cache_is_coherent(struct drm_device *dev, 56 enum i915_cache_level level) 57 { 58 return HAS_LLC(dev) || level != I915_CACHE_NONE; 59 } 60 61 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 62 { 63 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) 64 return false; 65 66 if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) 67 return true; 68 69 return obj->pin_display; 70 } 71 72 static int 73 insert_mappable_node(struct drm_i915_private *i915, 74 struct drm_mm_node *node, u32 size) 75 { 76 memset(node, 0, sizeof(*node)); 77 return drm_mm_insert_node_in_range_generic(&i915->ggtt.base.mm, node, 78 size, 0, 0, 0, 79 i915->ggtt.mappable_end, 80 DRM_MM_SEARCH_DEFAULT, 81 DRM_MM_CREATE_DEFAULT); 82 } 83 84 static void 85 remove_mappable_node(struct drm_mm_node *node) 86 { 87 drm_mm_remove_node(node); 88 } 89 90 /* some bookkeeping */ 91 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 92 size_t size) 93 { 94 lockmgr(&dev_priv->mm.object_stat_lock, LK_EXCLUSIVE); 95 dev_priv->mm.object_count++; 96 dev_priv->mm.object_memory += size; 97 lockmgr(&dev_priv->mm.object_stat_lock, LK_RELEASE); 98 } 99 100 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 101 size_t size) 102 { 103 lockmgr(&dev_priv->mm.object_stat_lock, LK_EXCLUSIVE); 104 dev_priv->mm.object_count--; 105 dev_priv->mm.object_memory -= size; 106 lockmgr(&dev_priv->mm.object_stat_lock, LK_RELEASE); 107 } 108 109 static int 110 i915_gem_wait_for_error(struct i915_gpu_error *error) 111 { 112 int ret; 113 114 if (!i915_reset_in_progress(error)) 115 return 0; 116 117 /* 118 * Only wait 10 seconds for the gpu reset to complete to avoid hanging 119 * userspace. If it takes that long something really bad is going on and 120 * we should simply try to bail out and fail as gracefully as possible. 121 */ 122 ret = wait_event_interruptible_timeout(error->reset_queue, 123 !i915_reset_in_progress(error), 124 10*HZ); 125 if (ret == 0) { 126 DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); 127 return -EIO; 128 } else if (ret < 0) { 129 return ret; 130 } else { 131 return 0; 132 } 133 } 134 135 int i915_mutex_lock_interruptible(struct drm_device *dev) 136 { 137 struct drm_i915_private *dev_priv = to_i915(dev); 138 int ret; 139 140 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 141 if (ret) 142 return ret; 143 144 ret = mutex_lock_interruptible(&dev->struct_mutex); 145 if (ret) 146 return ret; 147 148 WARN_ON(i915_verify_lists(dev)); 149 return 0; 150 } 151 152 int 153 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 154 struct drm_file *file) 155 { 156 struct drm_i915_private *dev_priv = to_i915(dev); 157 struct i915_ggtt *ggtt = &dev_priv->ggtt; 158 struct drm_i915_gem_get_aperture *args = data; 159 struct i915_vma *vma; 160 size_t pinned; 161 162 pinned = 0; 163 mutex_lock(&dev->struct_mutex); 164 list_for_each_entry(vma, &ggtt->base.active_list, vm_link) 165 if (vma->pin_count) 166 pinned += vma->node.size; 167 list_for_each_entry(vma, &ggtt->base.inactive_list, vm_link) 168 if (vma->pin_count) 169 pinned += vma->node.size; 170 mutex_unlock(&dev->struct_mutex); 171 172 args->aper_size = ggtt->base.total; 173 args->aper_available_size = args->aper_size - pinned; 174 175 return 0; 176 } 177 178 #if 0 179 static int 180 i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) 181 { 182 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; 183 char *vaddr = obj->phys_handle->vaddr; 184 struct sg_table *st; 185 struct scatterlist *sg; 186 int i; 187 188 if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) 189 return -EINVAL; 190 191 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 192 struct page *page; 193 char *src; 194 195 page = shmem_read_mapping_page(mapping, i); 196 if (IS_ERR(page)) 197 return PTR_ERR(page); 198 199 src = kmap_atomic(page); 200 memcpy(vaddr, src, PAGE_SIZE); 201 drm_clflush_virt_range(vaddr, PAGE_SIZE); 202 kunmap_atomic(src); 203 204 put_page(page); 205 vaddr += PAGE_SIZE; 206 } 207 208 i915_gem_chipset_flush(to_i915(obj->base.dev)); 209 210 st = kmalloc(sizeof(*st), GFP_KERNEL); 211 if (st == NULL) 212 return -ENOMEM; 213 214 if (sg_alloc_table(st, 1, GFP_KERNEL)) { 215 kfree(st); 216 return -ENOMEM; 217 } 218 219 sg = st->sgl; 220 sg->offset = 0; 221 sg->length = obj->base.size; 222 223 sg_dma_address(sg) = obj->phys_handle->busaddr; 224 sg_dma_len(sg) = obj->base.size; 225 226 obj->pages = st; 227 return 0; 228 } 229 230 static void 231 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj) 232 { 233 int ret; 234 235 BUG_ON(obj->madv == __I915_MADV_PURGED); 236 237 ret = i915_gem_object_set_to_cpu_domain(obj, true); 238 if (WARN_ON(ret)) { 239 /* In the event of a disaster, abandon all caches and 240 * hope for the best. 241 */ 242 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 243 } 244 245 if (obj->madv == I915_MADV_DONTNEED) 246 obj->dirty = 0; 247 248 if (obj->dirty) { 249 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; 250 char *vaddr = obj->phys_handle->vaddr; 251 int i; 252 253 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 254 struct page *page; 255 char *dst; 256 257 page = shmem_read_mapping_page(mapping, i); 258 if (IS_ERR(page)) 259 continue; 260 261 dst = kmap_atomic(page); 262 drm_clflush_virt_range(vaddr, PAGE_SIZE); 263 memcpy(dst, vaddr, PAGE_SIZE); 264 kunmap_atomic(dst); 265 266 set_page_dirty(page); 267 if (obj->madv == I915_MADV_WILLNEED) 268 mark_page_accessed(page); 269 put_page(page); 270 vaddr += PAGE_SIZE; 271 } 272 obj->dirty = 0; 273 } 274 275 sg_free_table(obj->pages); 276 kfree(obj->pages); 277 } 278 279 static void 280 i915_gem_object_release_phys(struct drm_i915_gem_object *obj) 281 { 282 drm_pci_free(obj->base.dev, obj->phys_handle); 283 } 284 #endif 285 286 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { 287 #if 0 288 .get_pages = i915_gem_object_get_pages_phys, 289 .put_pages = i915_gem_object_put_pages_phys, 290 .release = i915_gem_object_release_phys, 291 #endif 292 }; 293 294 static int 295 drop_pages(struct drm_i915_gem_object *obj) 296 { 297 struct i915_vma *vma, *next; 298 int ret; 299 300 i915_gem_object_get(obj); 301 list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) 302 if (i915_vma_unbind(vma)) 303 break; 304 305 ret = i915_gem_object_put_pages(obj); 306 i915_gem_object_put(obj); 307 308 return ret; 309 } 310 311 int 312 i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, 313 int align) 314 { 315 drm_dma_handle_t *phys; 316 int ret; 317 318 if (obj->phys_handle) { 319 if ((unsigned long)obj->phys_handle->vaddr & (align -1)) 320 return -EBUSY; 321 322 return 0; 323 } 324 325 if (obj->madv != I915_MADV_WILLNEED) 326 return -EFAULT; 327 328 if (obj->base.filp == NULL) 329 return -EINVAL; 330 331 ret = drop_pages(obj); 332 if (ret) 333 return ret; 334 335 /* create a new object */ 336 phys = drm_pci_alloc(obj->base.dev, obj->base.size, align); 337 if (!phys) 338 return -ENOMEM; 339 340 obj->phys_handle = phys; 341 obj->ops = &i915_gem_phys_ops; 342 343 return i915_gem_object_get_pages(obj); 344 } 345 346 static int 347 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, 348 struct drm_i915_gem_pwrite *args, 349 struct drm_file *file_priv) 350 { 351 struct drm_device *dev = obj->base.dev; 352 void *vaddr = obj->phys_handle->vaddr + args->offset; 353 char __user *user_data = u64_to_user_ptr(args->data_ptr); 354 int ret = 0; 355 356 /* We manually control the domain here and pretend that it 357 * remains coherent i.e. in the GTT domain, like shmem_pwrite. 358 */ 359 ret = i915_gem_object_wait_rendering(obj, false); 360 if (ret) 361 return ret; 362 363 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 364 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) { 365 unsigned long unwritten; 366 367 /* The physical object once assigned is fixed for the lifetime 368 * of the obj, so we can safely drop the lock and continue 369 * to access vaddr. 370 */ 371 mutex_unlock(&dev->struct_mutex); 372 unwritten = copy_from_user(vaddr, user_data, args->size); 373 mutex_lock(&dev->struct_mutex); 374 if (unwritten) { 375 ret = -EFAULT; 376 goto out; 377 } 378 } 379 380 drm_clflush_virt_range(vaddr, args->size); 381 i915_gem_chipset_flush(to_i915(dev)); 382 383 out: 384 intel_fb_obj_flush(obj, false, ORIGIN_CPU); 385 return ret; 386 } 387 388 void *i915_gem_object_alloc(struct drm_device *dev) 389 { 390 return kzalloc(sizeof(struct drm_i915_gem_object), GFP_KERNEL); 391 } 392 393 void i915_gem_object_free(struct drm_i915_gem_object *obj) 394 { 395 kfree(obj); 396 } 397 398 static int 399 i915_gem_create(struct drm_file *file, 400 struct drm_device *dev, 401 uint64_t size, 402 uint32_t *handle_p) 403 { 404 struct drm_i915_gem_object *obj; 405 int ret; 406 u32 handle; 407 408 size = roundup(size, PAGE_SIZE); 409 if (size == 0) 410 return -EINVAL; 411 412 /* Allocate the new object */ 413 obj = i915_gem_object_create(dev, size); 414 if (IS_ERR(obj)) 415 return PTR_ERR(obj); 416 417 ret = drm_gem_handle_create(file, &obj->base, &handle); 418 /* drop reference from allocate - handle holds it now */ 419 i915_gem_object_put_unlocked(obj); 420 if (ret) 421 return ret; 422 423 *handle_p = handle; 424 return 0; 425 } 426 427 int 428 i915_gem_dumb_create(struct drm_file *file, 429 struct drm_device *dev, 430 struct drm_mode_create_dumb *args) 431 { 432 /* have to work out size/pitch and return them */ 433 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); 434 args->size = args->pitch * args->height; 435 return i915_gem_create(file, dev, 436 args->size, &args->handle); 437 } 438 439 /** 440 * Creates a new mm object and returns a handle to it. 441 * @dev: drm device pointer 442 * @data: ioctl data blob 443 * @file: drm file pointer 444 */ 445 int 446 i915_gem_create_ioctl(struct drm_device *dev, void *data, 447 struct drm_file *file) 448 { 449 struct drm_i915_gem_create *args = data; 450 451 return i915_gem_create(file, dev, 452 args->size, &args->handle); 453 } 454 455 static inline int 456 __copy_to_user_swizzled(char __user *cpu_vaddr, 457 const char *gpu_vaddr, int gpu_offset, 458 int length) 459 { 460 int ret, cpu_offset = 0; 461 462 while (length > 0) { 463 int cacheline_end = ALIGN(gpu_offset + 1, 64); 464 int this_length = min(cacheline_end - gpu_offset, length); 465 int swizzled_gpu_offset = gpu_offset ^ 64; 466 467 ret = __copy_to_user(cpu_vaddr + cpu_offset, 468 gpu_vaddr + swizzled_gpu_offset, 469 this_length); 470 if (ret) 471 return ret + length; 472 473 cpu_offset += this_length; 474 gpu_offset += this_length; 475 length -= this_length; 476 } 477 478 return 0; 479 } 480 481 static inline int 482 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, 483 const char __user *cpu_vaddr, 484 int length) 485 { 486 int ret, cpu_offset = 0; 487 488 while (length > 0) { 489 int cacheline_end = ALIGN(gpu_offset + 1, 64); 490 int this_length = min(cacheline_end - gpu_offset, length); 491 int swizzled_gpu_offset = gpu_offset ^ 64; 492 493 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, 494 cpu_vaddr + cpu_offset, 495 this_length); 496 if (ret) 497 return ret + length; 498 499 cpu_offset += this_length; 500 gpu_offset += this_length; 501 length -= this_length; 502 } 503 504 return 0; 505 } 506 507 /* 508 * Pins the specified object's pages and synchronizes the object with 509 * GPU accesses. Sets needs_clflush to non-zero if the caller should 510 * flush the object from the CPU cache. 511 */ 512 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, 513 int *needs_clflush) 514 { 515 int ret; 516 517 *needs_clflush = 0; 518 519 if (WARN_ON(!i915_gem_object_has_struct_page(obj))) 520 return -EINVAL; 521 522 ret = i915_gem_object_wait_rendering(obj, true); 523 if (ret) 524 return ret; 525 526 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) { 527 /* If we're not in the cpu read domain, set ourself into the gtt 528 * read domain and manually flush cachelines (if required). This 529 * optimizes for the case when the gpu will dirty the data 530 * anyway again before the next pread happens. */ 531 *needs_clflush = !cpu_cache_is_coherent(obj->base.dev, 532 obj->cache_level); 533 } 534 535 ret = i915_gem_object_get_pages(obj); 536 if (ret) 537 return ret; 538 539 i915_gem_object_pin_pages(obj); 540 541 return ret; 542 } 543 544 /* Per-page copy function for the shmem pread fastpath. 545 * Flushes invalid cachelines before reading the target if 546 * needs_clflush is set. */ 547 static int 548 shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length, 549 char __user *user_data, 550 bool page_do_bit17_swizzling, bool needs_clflush) 551 { 552 char *vaddr; 553 int ret; 554 555 if (unlikely(page_do_bit17_swizzling)) 556 return -EINVAL; 557 558 vaddr = kmap_atomic(page); 559 if (needs_clflush) 560 drm_clflush_virt_range(vaddr + shmem_page_offset, 561 page_length); 562 ret = __copy_to_user_inatomic(user_data, 563 vaddr + shmem_page_offset, 564 page_length); 565 kunmap_atomic(vaddr); 566 567 return ret ? -EFAULT : 0; 568 } 569 570 static void 571 shmem_clflush_swizzled_range(char *addr, unsigned long length, 572 bool swizzled) 573 { 574 if (unlikely(swizzled)) { 575 unsigned long start = (unsigned long) addr; 576 unsigned long end = (unsigned long) addr + length; 577 578 /* For swizzling simply ensure that we always flush both 579 * channels. Lame, but simple and it works. Swizzled 580 * pwrite/pread is far from a hotpath - current userspace 581 * doesn't use it at all. */ 582 start = round_down(start, 128); 583 end = round_up(end, 128); 584 585 drm_clflush_virt_range((void *)start, end - start); 586 } else { 587 drm_clflush_virt_range(addr, length); 588 } 589 590 } 591 592 /* Only difference to the fast-path function is that this can handle bit17 593 * and uses non-atomic copy and kmap functions. */ 594 static int 595 shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length, 596 char __user *user_data, 597 bool page_do_bit17_swizzling, bool needs_clflush) 598 { 599 char *vaddr; 600 int ret; 601 602 vaddr = kmap(page); 603 if (needs_clflush) 604 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 605 page_length, 606 page_do_bit17_swizzling); 607 608 if (page_do_bit17_swizzling) 609 ret = __copy_to_user_swizzled(user_data, 610 vaddr, shmem_page_offset, 611 page_length); 612 else 613 ret = __copy_to_user(user_data, 614 vaddr + shmem_page_offset, 615 page_length); 616 kunmap(page); 617 618 return ret ? - EFAULT : 0; 619 } 620 621 static inline unsigned long 622 slow_user_access(struct io_mapping *mapping, 623 uint64_t page_base, int page_offset, 624 char __user *user_data, 625 unsigned long length, bool pwrite) 626 { 627 void __iomem *ioaddr; 628 void *vaddr; 629 uint64_t unwritten; 630 631 ioaddr = io_mapping_map_wc(mapping, page_base, PAGE_SIZE); 632 /* We can use the cpu mem copy function because this is X86. */ 633 vaddr = (void __force *)ioaddr + page_offset; 634 if (pwrite) 635 unwritten = __copy_from_user(vaddr, user_data, length); 636 else 637 unwritten = __copy_to_user(user_data, vaddr, length); 638 639 io_mapping_unmap(ioaddr); 640 return unwritten; 641 } 642 643 static int 644 i915_gem_gtt_pread(struct drm_device *dev, 645 struct drm_i915_gem_object *obj, uint64_t size, 646 uint64_t data_offset, uint64_t data_ptr) 647 { 648 struct drm_i915_private *dev_priv = to_i915(dev); 649 struct i915_ggtt *ggtt = &dev_priv->ggtt; 650 struct drm_mm_node node; 651 char __user *user_data; 652 uint64_t remain; 653 uint64_t offset; 654 int ret; 655 656 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE); 657 if (ret) { 658 ret = insert_mappable_node(dev_priv, &node, PAGE_SIZE); 659 if (ret) 660 goto out; 661 662 ret = i915_gem_object_get_pages(obj); 663 if (ret) { 664 remove_mappable_node(&node); 665 goto out; 666 } 667 668 i915_gem_object_pin_pages(obj); 669 } else { 670 node.start = i915_gem_obj_ggtt_offset(obj); 671 node.allocated = false; 672 ret = i915_gem_object_put_fence(obj); 673 if (ret) 674 goto out_unpin; 675 } 676 677 ret = i915_gem_object_set_to_gtt_domain(obj, false); 678 if (ret) 679 goto out_unpin; 680 681 user_data = u64_to_user_ptr(data_ptr); 682 remain = size; 683 offset = data_offset; 684 685 mutex_unlock(&dev->struct_mutex); 686 if (likely(!i915.prefault_disable)) { 687 ret = fault_in_multipages_writeable(user_data, remain); 688 if (ret) { 689 mutex_lock(&dev->struct_mutex); 690 goto out_unpin; 691 } 692 } 693 694 while (remain > 0) { 695 /* Operation in this page 696 * 697 * page_base = page offset within aperture 698 * page_offset = offset within page 699 * page_length = bytes to copy for this page 700 */ 701 u32 page_base = node.start; 702 unsigned page_offset = offset_in_page(offset); 703 unsigned page_length = PAGE_SIZE - page_offset; 704 page_length = remain < page_length ? remain : page_length; 705 if (node.allocated) { 706 wmb(); 707 ggtt->base.insert_page(&ggtt->base, 708 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 709 node.start, 710 I915_CACHE_NONE, 0); 711 wmb(); 712 } else { 713 page_base += offset & PAGE_MASK; 714 } 715 /* This is a slow read/write as it tries to read from 716 * and write to user memory which may result into page 717 * faults, and so we cannot perform this under struct_mutex. 718 */ 719 if (slow_user_access(ggtt->mappable, page_base, 720 page_offset, user_data, 721 page_length, false)) { 722 ret = -EFAULT; 723 break; 724 } 725 726 remain -= page_length; 727 user_data += page_length; 728 offset += page_length; 729 } 730 731 mutex_lock(&dev->struct_mutex); 732 if (ret == 0 && (obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) { 733 /* The user has modified the object whilst we tried 734 * reading from it, and we now have no idea what domain 735 * the pages should be in. As we have just been touching 736 * them directly, flush everything back to the GTT 737 * domain. 738 */ 739 ret = i915_gem_object_set_to_gtt_domain(obj, false); 740 } 741 742 out_unpin: 743 if (node.allocated) { 744 wmb(); 745 ggtt->base.clear_range(&ggtt->base, 746 node.start, node.size, 747 true); 748 i915_gem_object_unpin_pages(obj); 749 remove_mappable_node(&node); 750 } else { 751 i915_gem_object_ggtt_unpin(obj); 752 } 753 out: 754 return ret; 755 } 756 757 static int 758 i915_gem_shmem_pread(struct drm_device *dev, 759 struct drm_i915_gem_object *obj, 760 struct drm_i915_gem_pread *args, 761 struct drm_file *file) 762 { 763 char __user *user_data; 764 ssize_t remain; 765 loff_t offset; 766 int shmem_page_offset, page_length, ret = 0; 767 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 768 int prefaulted = 0; 769 int needs_clflush = 0; 770 struct sg_page_iter sg_iter; 771 772 if (!i915_gem_object_has_struct_page(obj)) 773 return -ENODEV; 774 775 user_data = u64_to_user_ptr(args->data_ptr); 776 remain = args->size; 777 778 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 779 780 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); 781 if (ret) 782 return ret; 783 784 offset = args->offset; 785 786 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 787 offset >> PAGE_SHIFT) { 788 struct page *page = sg_page_iter_page(&sg_iter); 789 790 if (remain <= 0) 791 break; 792 793 /* Operation in this page 794 * 795 * shmem_page_offset = offset within page in shmem file 796 * page_length = bytes to copy for this page 797 */ 798 shmem_page_offset = offset_in_page(offset); 799 page_length = remain; 800 if ((shmem_page_offset + page_length) > PAGE_SIZE) 801 page_length = PAGE_SIZE - shmem_page_offset; 802 803 page_do_bit17_swizzling = obj_do_bit17_swizzling && 804 (page_to_phys(page) & (1 << 17)) != 0; 805 806 ret = shmem_pread_fast(page, shmem_page_offset, page_length, 807 user_data, page_do_bit17_swizzling, 808 needs_clflush); 809 if (ret == 0) 810 goto next_page; 811 812 mutex_unlock(&dev->struct_mutex); 813 814 if (likely(!i915.prefault_disable) && !prefaulted) { 815 ret = fault_in_multipages_writeable(user_data, remain); 816 /* Userspace is tricking us, but we've already clobbered 817 * its pages with the prefault and promised to write the 818 * data up to the first fault. Hence ignore any errors 819 * and just continue. */ 820 (void)ret; 821 prefaulted = 1; 822 } 823 824 ret = shmem_pread_slow(page, shmem_page_offset, page_length, 825 user_data, page_do_bit17_swizzling, 826 needs_clflush); 827 828 mutex_lock(&dev->struct_mutex); 829 830 if (ret) 831 goto out; 832 833 next_page: 834 remain -= page_length; 835 user_data += page_length; 836 offset += page_length; 837 } 838 839 out: 840 i915_gem_object_unpin_pages(obj); 841 842 return ret; 843 } 844 845 /** 846 * Reads data from the object referenced by handle. 847 * @dev: drm device pointer 848 * @data: ioctl data blob 849 * @file: drm file pointer 850 * 851 * On error, the contents of *data are undefined. 852 */ 853 int 854 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 855 struct drm_file *file) 856 { 857 struct drm_i915_gem_pread *args = data; 858 struct drm_i915_gem_object *obj; 859 int ret = 0; 860 861 if (args->size == 0) 862 return 0; 863 864 #if 0 865 if (!access_ok(VERIFY_WRITE, 866 u64_to_user_ptr(args->data_ptr), 867 args->size)) 868 return -EFAULT; 869 #endif 870 871 ret = i915_mutex_lock_interruptible(dev); 872 if (ret) 873 return ret; 874 875 obj = i915_gem_object_lookup(file, args->handle); 876 if (!obj) { 877 ret = -ENOENT; 878 goto unlock; 879 } 880 881 /* Bounds check source. */ 882 if (args->offset > obj->base.size || 883 args->size > obj->base.size - args->offset) { 884 ret = -EINVAL; 885 goto out; 886 } 887 888 trace_i915_gem_object_pread(obj, args->offset, args->size); 889 890 ret = i915_gem_shmem_pread(dev, obj, args, file); 891 892 /* pread for non shmem backed objects */ 893 if (ret == -EFAULT || ret == -ENODEV) 894 ret = i915_gem_gtt_pread(dev, obj, args->size, 895 args->offset, args->data_ptr); 896 897 out: 898 i915_gem_object_put(obj); 899 unlock: 900 mutex_unlock(&dev->struct_mutex); 901 return ret; 902 } 903 904 /* This is the fast write path which cannot handle 905 * page faults in the source data 906 */ 907 908 static inline int 909 fast_user_write(struct io_mapping *mapping, 910 loff_t page_base, int page_offset, 911 char __user *user_data, 912 int length) 913 { 914 void __iomem *vaddr_atomic; 915 void *vaddr; 916 unsigned long unwritten; 917 918 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); 919 /* We can use the cpu mem copy function because this is X86. */ 920 vaddr = (void __force*)vaddr_atomic + page_offset; 921 unwritten = __copy_from_user_inatomic_nocache(vaddr, 922 user_data, length); 923 io_mapping_unmap_atomic(vaddr_atomic); 924 return unwritten; 925 } 926 927 /** 928 * This is the fast pwrite path, where we copy the data directly from the 929 * user into the GTT, uncached. 930 * @i915: i915 device private data 931 * @obj: i915 gem object 932 * @args: pwrite arguments structure 933 * @file: drm file pointer 934 */ 935 static int 936 i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915, 937 struct drm_i915_gem_object *obj, 938 struct drm_i915_gem_pwrite *args, 939 struct drm_file *file) 940 { 941 struct i915_ggtt *ggtt = &i915->ggtt; 942 struct drm_device *dev = obj->base.dev; 943 struct drm_mm_node node; 944 uint64_t remain, offset; 945 char __user *user_data; 946 int ret; 947 bool hit_slow_path = false; 948 949 if (obj->tiling_mode != I915_TILING_NONE) 950 return -EFAULT; 951 952 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK); 953 if (ret) { 954 ret = insert_mappable_node(i915, &node, PAGE_SIZE); 955 if (ret) 956 goto out; 957 958 ret = i915_gem_object_get_pages(obj); 959 if (ret) { 960 remove_mappable_node(&node); 961 goto out; 962 } 963 964 i915_gem_object_pin_pages(obj); 965 } else { 966 node.start = i915_gem_obj_ggtt_offset(obj); 967 node.allocated = false; 968 ret = i915_gem_object_put_fence(obj); 969 if (ret) 970 goto out_unpin; 971 } 972 973 ret = i915_gem_object_set_to_gtt_domain(obj, true); 974 if (ret) 975 goto out_unpin; 976 977 intel_fb_obj_invalidate(obj, ORIGIN_GTT); 978 obj->dirty = true; 979 980 user_data = u64_to_user_ptr(args->data_ptr); 981 offset = args->offset; 982 remain = args->size; 983 while (remain) { 984 /* Operation in this page 985 * 986 * page_base = page offset within aperture 987 * page_offset = offset within page 988 * page_length = bytes to copy for this page 989 */ 990 u32 page_base = node.start; 991 unsigned page_offset = offset_in_page(offset); 992 unsigned page_length = PAGE_SIZE - page_offset; 993 page_length = remain < page_length ? remain : page_length; 994 if (node.allocated) { 995 wmb(); /* flush the write before we modify the GGTT */ 996 ggtt->base.insert_page(&ggtt->base, 997 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 998 node.start, I915_CACHE_NONE, 0); 999 wmb(); /* flush modifications to the GGTT (insert_page) */ 1000 } else { 1001 page_base += offset & LINUX_PAGE_MASK; 1002 } 1003 /* If we get a fault while copying data, then (presumably) our 1004 * source page isn't available. Return the error and we'll 1005 * retry in the slow path. 1006 * If the object is non-shmem backed, we retry again with the 1007 * path that handles page fault. 1008 */ 1009 if (fast_user_write(ggtt->mappable, page_base, 1010 page_offset, user_data, page_length)) { 1011 hit_slow_path = true; 1012 mutex_unlock(&dev->struct_mutex); 1013 if (slow_user_access(ggtt->mappable, 1014 page_base, 1015 page_offset, user_data, 1016 page_length, true)) { 1017 ret = -EFAULT; 1018 mutex_lock(&dev->struct_mutex); 1019 goto out_flush; 1020 } 1021 1022 mutex_lock(&dev->struct_mutex); 1023 } 1024 1025 remain -= page_length; 1026 user_data += page_length; 1027 offset += page_length; 1028 } 1029 1030 out_flush: 1031 if (hit_slow_path) { 1032 if (ret == 0 && 1033 (obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) { 1034 /* The user has modified the object whilst we tried 1035 * reading from it, and we now have no idea what domain 1036 * the pages should be in. As we have just been touching 1037 * them directly, flush everything back to the GTT 1038 * domain. 1039 */ 1040 ret = i915_gem_object_set_to_gtt_domain(obj, false); 1041 } 1042 } 1043 1044 intel_fb_obj_flush(obj, false, ORIGIN_GTT); 1045 out_unpin: 1046 if (node.allocated) { 1047 wmb(); 1048 ggtt->base.clear_range(&ggtt->base, 1049 node.start, node.size, 1050 true); 1051 i915_gem_object_unpin_pages(obj); 1052 remove_mappable_node(&node); 1053 } else { 1054 i915_gem_object_ggtt_unpin(obj); 1055 } 1056 out: 1057 return ret; 1058 } 1059 1060 /* Per-page copy function for the shmem pwrite fastpath. 1061 * Flushes invalid cachelines before writing to the target if 1062 * needs_clflush_before is set and flushes out any written cachelines after 1063 * writing if needs_clflush is set. */ 1064 static int 1065 shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length, 1066 char __user *user_data, 1067 bool page_do_bit17_swizzling, 1068 bool needs_clflush_before, 1069 bool needs_clflush_after) 1070 { 1071 char *vaddr; 1072 int ret; 1073 1074 if (unlikely(page_do_bit17_swizzling)) 1075 return -EINVAL; 1076 1077 vaddr = kmap_atomic(page); 1078 if (needs_clflush_before) 1079 drm_clflush_virt_range(vaddr + shmem_page_offset, 1080 page_length); 1081 ret = __copy_from_user_inatomic(vaddr + shmem_page_offset, 1082 user_data, page_length); 1083 if (needs_clflush_after) 1084 drm_clflush_virt_range(vaddr + shmem_page_offset, 1085 page_length); 1086 kunmap_atomic(vaddr); 1087 1088 return ret ? -EFAULT : 0; 1089 } 1090 1091 /* Only difference to the fast-path function is that this can handle bit17 1092 * and uses non-atomic copy and kmap functions. */ 1093 static int 1094 shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length, 1095 char __user *user_data, 1096 bool page_do_bit17_swizzling, 1097 bool needs_clflush_before, 1098 bool needs_clflush_after) 1099 { 1100 char *vaddr; 1101 int ret; 1102 1103 vaddr = kmap(page); 1104 if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) 1105 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 1106 page_length, 1107 page_do_bit17_swizzling); 1108 if (page_do_bit17_swizzling) 1109 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset, 1110 user_data, 1111 page_length); 1112 else 1113 ret = __copy_from_user(vaddr + shmem_page_offset, 1114 user_data, 1115 page_length); 1116 if (needs_clflush_after) 1117 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 1118 page_length, 1119 page_do_bit17_swizzling); 1120 kunmap(page); 1121 1122 return ret ? -EFAULT : 0; 1123 } 1124 1125 static int 1126 i915_gem_shmem_pwrite(struct drm_device *dev, 1127 struct drm_i915_gem_object *obj, 1128 struct drm_i915_gem_pwrite *args, 1129 struct drm_file *file) 1130 { 1131 ssize_t remain; 1132 loff_t offset; 1133 char __user *user_data; 1134 int shmem_page_offset, page_length, ret = 0; 1135 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 1136 int hit_slowpath = 0; 1137 int needs_clflush_after = 0; 1138 int needs_clflush_before = 0; 1139 struct sg_page_iter sg_iter; 1140 1141 user_data = u64_to_user_ptr(args->data_ptr); 1142 remain = args->size; 1143 1144 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 1145 1146 ret = i915_gem_object_wait_rendering(obj, false); 1147 if (ret) 1148 return ret; 1149 1150 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 1151 /* If we're not in the cpu write domain, set ourself into the gtt 1152 * write domain and manually flush cachelines (if required). This 1153 * optimizes for the case when the gpu will use the data 1154 * right away and we therefore have to clflush anyway. */ 1155 needs_clflush_after = cpu_write_needs_clflush(obj); 1156 } 1157 /* Same trick applies to invalidate partially written cachelines read 1158 * before writing. */ 1159 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) 1160 needs_clflush_before = 1161 !cpu_cache_is_coherent(dev, obj->cache_level); 1162 1163 ret = i915_gem_object_get_pages(obj); 1164 if (ret) 1165 return ret; 1166 1167 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 1168 1169 i915_gem_object_pin_pages(obj); 1170 1171 offset = args->offset; 1172 obj->dirty = 1; 1173 1174 VM_OBJECT_LOCK(obj->base.filp); 1175 vm_object_pip_add(obj->base.filp, 1); 1176 1177 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 1178 offset >> PAGE_SHIFT) { 1179 struct page *page = sg_page_iter_page(&sg_iter); 1180 int partial_cacheline_write; 1181 1182 if (remain <= 0) 1183 break; 1184 1185 /* Operation in this page 1186 * 1187 * shmem_page_offset = offset within page in shmem file 1188 * page_length = bytes to copy for this page 1189 */ 1190 shmem_page_offset = offset_in_page(offset); 1191 1192 page_length = remain; 1193 if ((shmem_page_offset + page_length) > PAGE_SIZE) 1194 page_length = PAGE_SIZE - shmem_page_offset; 1195 1196 /* If we don't overwrite a cacheline completely we need to be 1197 * careful to have up-to-date data by first clflushing. Don't 1198 * overcomplicate things and flush the entire patch. */ 1199 partial_cacheline_write = needs_clflush_before && 1200 ((shmem_page_offset | page_length) 1201 & (boot_cpu_data.x86_clflush_size - 1)); 1202 1203 page_do_bit17_swizzling = obj_do_bit17_swizzling && 1204 (page_to_phys(page) & (1 << 17)) != 0; 1205 1206 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length, 1207 user_data, page_do_bit17_swizzling, 1208 partial_cacheline_write, 1209 needs_clflush_after); 1210 if (ret == 0) 1211 goto next_page; 1212 1213 hit_slowpath = 1; 1214 mutex_unlock(&dev->struct_mutex); 1215 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length, 1216 user_data, page_do_bit17_swizzling, 1217 partial_cacheline_write, 1218 needs_clflush_after); 1219 1220 mutex_lock(&dev->struct_mutex); 1221 1222 if (ret) 1223 goto out; 1224 1225 next_page: 1226 remain -= page_length; 1227 user_data += page_length; 1228 offset += page_length; 1229 } 1230 vm_object_pip_wakeup(obj->base.filp); 1231 VM_OBJECT_UNLOCK(obj->base.filp); 1232 1233 out: 1234 i915_gem_object_unpin_pages(obj); 1235 1236 if (hit_slowpath) { 1237 /* 1238 * Fixup: Flush cpu caches in case we didn't flush the dirty 1239 * cachelines in-line while writing and the object moved 1240 * out of the cpu write domain while we've dropped the lock. 1241 */ 1242 if (!needs_clflush_after && 1243 obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 1244 if (i915_gem_clflush_object(obj, obj->pin_display)) 1245 needs_clflush_after = true; 1246 } 1247 } 1248 1249 if (needs_clflush_after) 1250 i915_gem_chipset_flush(to_i915(dev)); 1251 else 1252 obj->cache_dirty = true; 1253 1254 intel_fb_obj_flush(obj, false, ORIGIN_CPU); 1255 return ret; 1256 } 1257 1258 /** 1259 * Writes data to the object referenced by handle. 1260 * @dev: drm device 1261 * @data: ioctl data blob 1262 * @file: drm file 1263 * 1264 * On error, the contents of the buffer that were to be modified are undefined. 1265 */ 1266 int 1267 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 1268 struct drm_file *file) 1269 { 1270 struct drm_i915_private *dev_priv = to_i915(dev); 1271 struct drm_i915_gem_pwrite *args = data; 1272 struct drm_i915_gem_object *obj; 1273 int ret; 1274 1275 if (args->size == 0) 1276 return 0; 1277 1278 #if 0 1279 if (!access_ok(VERIFY_READ, 1280 u64_to_user_ptr(args->data_ptr), 1281 args->size)) 1282 return -EFAULT; 1283 #endif 1284 1285 if (likely(!i915.prefault_disable)) { 1286 ret = fault_in_multipages_readable(u64_to_user_ptr(args->data_ptr), 1287 args->size); 1288 if (ret) 1289 return -EFAULT; 1290 } 1291 1292 intel_runtime_pm_get(dev_priv); 1293 1294 ret = i915_mutex_lock_interruptible(dev); 1295 if (ret) 1296 goto put_rpm; 1297 1298 obj = i915_gem_object_lookup(file, args->handle); 1299 if (!obj) { 1300 ret = -ENOENT; 1301 goto unlock; 1302 } 1303 1304 /* Bounds check destination. */ 1305 if (args->offset > obj->base.size || 1306 args->size > obj->base.size - args->offset) { 1307 ret = -EINVAL; 1308 goto out; 1309 } 1310 1311 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 1312 1313 ret = -EFAULT; 1314 /* We can only do the GTT pwrite on untiled buffers, as otherwise 1315 * it would end up going through the fenced access, and we'll get 1316 * different detiling behavior between reading and writing. 1317 * pread/pwrite currently are reading and writing from the CPU 1318 * perspective, requiring manual detiling by the client. 1319 */ 1320 if (!i915_gem_object_has_struct_page(obj) || 1321 cpu_write_needs_clflush(obj)) { 1322 ret = i915_gem_gtt_pwrite_fast(dev_priv, obj, args, file); 1323 /* Note that the gtt paths might fail with non-page-backed user 1324 * pointers (e.g. gtt mappings when moving data between 1325 * textures). Fallback to the shmem path in that case. */ 1326 } 1327 1328 if (ret == -EFAULT || ret == -ENOSPC) { 1329 if (obj->phys_handle) 1330 ret = i915_gem_phys_pwrite(obj, args, file); 1331 else if (i915_gem_object_has_struct_page(obj)) 1332 ret = i915_gem_shmem_pwrite(dev, obj, args, file); 1333 else 1334 ret = -ENODEV; 1335 } 1336 1337 out: 1338 i915_gem_object_put(obj); 1339 unlock: 1340 mutex_unlock(&dev->struct_mutex); 1341 put_rpm: 1342 intel_runtime_pm_put(dev_priv); 1343 1344 return ret; 1345 } 1346 1347 /** 1348 * Ensures that all rendering to the object has completed and the object is 1349 * safe to unbind from the GTT or access from the CPU. 1350 * @obj: i915 gem object 1351 * @readonly: waiting for read access or write 1352 */ 1353 int 1354 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, 1355 bool readonly) 1356 { 1357 struct reservation_object *resv; 1358 int ret, i; 1359 1360 if (readonly) { 1361 if (obj->last_write_req != NULL) { 1362 ret = i915_wait_request(obj->last_write_req); 1363 if (ret) 1364 return ret; 1365 1366 i = obj->last_write_req->engine->id; 1367 if (obj->last_read_req[i] == obj->last_write_req) 1368 i915_gem_object_retire__read(obj, i); 1369 else 1370 i915_gem_object_retire__write(obj); 1371 } 1372 } else { 1373 for (i = 0; i < I915_NUM_ENGINES; i++) { 1374 if (obj->last_read_req[i] == NULL) 1375 continue; 1376 1377 ret = i915_wait_request(obj->last_read_req[i]); 1378 if (ret) 1379 return ret; 1380 1381 i915_gem_object_retire__read(obj, i); 1382 } 1383 GEM_BUG_ON(obj->active); 1384 } 1385 1386 resv = i915_gem_object_get_dmabuf_resv(obj); 1387 if (resv) { 1388 long err; 1389 1390 err = reservation_object_wait_timeout_rcu(resv, !readonly, true, 1391 MAX_SCHEDULE_TIMEOUT); 1392 if (err < 0) 1393 return err; 1394 } 1395 1396 return 0; 1397 } 1398 1399 static void 1400 i915_gem_object_retire_request(struct drm_i915_gem_object *obj, 1401 struct drm_i915_gem_request *req) 1402 { 1403 int ring = req->engine->id; 1404 1405 if (obj->last_read_req[ring] == req) 1406 i915_gem_object_retire__read(obj, ring); 1407 else if (obj->last_write_req == req) 1408 i915_gem_object_retire__write(obj); 1409 1410 if (!i915_reset_in_progress(&req->i915->gpu_error)) 1411 i915_gem_request_retire_upto(req); 1412 } 1413 1414 /* A nonblocking variant of the above wait. This is a highly dangerous routine 1415 * as the object state may change during this call. 1416 */ 1417 static __must_check int 1418 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, 1419 struct intel_rps_client *rps, 1420 bool readonly) 1421 { 1422 struct drm_device *dev = obj->base.dev; 1423 struct drm_i915_private *dev_priv = to_i915(dev); 1424 struct drm_i915_gem_request *requests[I915_NUM_ENGINES]; 1425 int ret, i, n = 0; 1426 1427 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1428 BUG_ON(!dev_priv->mm.interruptible); 1429 1430 if (!obj->active) 1431 return 0; 1432 1433 if (readonly) { 1434 struct drm_i915_gem_request *req; 1435 1436 req = obj->last_write_req; 1437 if (req == NULL) 1438 return 0; 1439 1440 requests[n++] = i915_gem_request_get(req); 1441 } else { 1442 for (i = 0; i < I915_NUM_ENGINES; i++) { 1443 struct drm_i915_gem_request *req; 1444 1445 req = obj->last_read_req[i]; 1446 if (req == NULL) 1447 continue; 1448 1449 requests[n++] = i915_gem_request_get(req); 1450 } 1451 } 1452 1453 mutex_unlock(&dev->struct_mutex); 1454 ret = 0; 1455 for (i = 0; ret == 0 && i < n; i++) 1456 ret = __i915_wait_request(requests[i], true, NULL, rps); 1457 mutex_lock(&dev->struct_mutex); 1458 1459 for (i = 0; i < n; i++) { 1460 if (ret == 0) 1461 i915_gem_object_retire_request(obj, requests[i]); 1462 i915_gem_request_put(requests[i]); 1463 } 1464 1465 return ret; 1466 } 1467 1468 static struct intel_rps_client *to_rps_client(struct drm_file *file) 1469 { 1470 struct drm_i915_file_private *fpriv = file->driver_priv; 1471 return &fpriv->rps; 1472 } 1473 1474 static enum fb_op_origin 1475 write_origin(struct drm_i915_gem_object *obj, unsigned domain) 1476 { 1477 return domain == I915_GEM_DOMAIN_GTT && !obj->has_wc_mmap ? 1478 ORIGIN_GTT : ORIGIN_CPU; 1479 } 1480 1481 /** 1482 * Called when user space prepares to use an object with the CPU, either 1483 * through the mmap ioctl's mapping or a GTT mapping. 1484 * @dev: drm device 1485 * @data: ioctl data blob 1486 * @file: drm file 1487 */ 1488 int 1489 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1490 struct drm_file *file) 1491 { 1492 struct drm_i915_gem_set_domain *args = data; 1493 struct drm_i915_gem_object *obj; 1494 uint32_t read_domains = args->read_domains; 1495 uint32_t write_domain = args->write_domain; 1496 int ret; 1497 1498 /* Only handle setting domains to types used by the CPU. */ 1499 if (write_domain & I915_GEM_GPU_DOMAINS) 1500 return -EINVAL; 1501 1502 if (read_domains & I915_GEM_GPU_DOMAINS) 1503 return -EINVAL; 1504 1505 /* Having something in the write domain implies it's in the read 1506 * domain, and only that read domain. Enforce that in the request. 1507 */ 1508 if (write_domain != 0 && read_domains != write_domain) 1509 return -EINVAL; 1510 1511 ret = i915_mutex_lock_interruptible(dev); 1512 if (ret) 1513 return ret; 1514 1515 obj = i915_gem_object_lookup(file, args->handle); 1516 if (!obj) { 1517 ret = -ENOENT; 1518 goto unlock; 1519 } 1520 1521 /* Try to flush the object off the GPU without holding the lock. 1522 * We will repeat the flush holding the lock in the normal manner 1523 * to catch cases where we are gazumped. 1524 */ 1525 ret = i915_gem_object_wait_rendering__nonblocking(obj, 1526 to_rps_client(file), 1527 !write_domain); 1528 if (ret) 1529 goto unref; 1530 1531 if (read_domains & I915_GEM_DOMAIN_GTT) 1532 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 1533 else 1534 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 1535 1536 if (write_domain != 0) 1537 intel_fb_obj_invalidate(obj, write_origin(obj, write_domain)); 1538 1539 unref: 1540 i915_gem_object_put(obj); 1541 unlock: 1542 mutex_unlock(&dev->struct_mutex); 1543 return ret; 1544 } 1545 1546 /** 1547 * Called when user space has done writes to this buffer 1548 * @dev: drm device 1549 * @data: ioctl data blob 1550 * @file: drm file 1551 */ 1552 int 1553 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1554 struct drm_file *file) 1555 { 1556 struct drm_i915_gem_sw_finish *args = data; 1557 struct drm_i915_gem_object *obj; 1558 int ret = 0; 1559 1560 ret = i915_mutex_lock_interruptible(dev); 1561 if (ret) 1562 return ret; 1563 1564 obj = i915_gem_object_lookup(file, args->handle); 1565 if (!obj) { 1566 ret = -ENOENT; 1567 goto unlock; 1568 } 1569 1570 /* Pinned buffers may be scanout, so flush the cache */ 1571 if (obj->pin_display) 1572 i915_gem_object_flush_cpu_write_domain(obj); 1573 1574 i915_gem_object_put(obj); 1575 unlock: 1576 mutex_unlock(&dev->struct_mutex); 1577 return ret; 1578 } 1579 1580 /** 1581 * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address 1582 * it is mapped to. 1583 * @dev: drm device 1584 * @data: ioctl data blob 1585 * @file: drm file 1586 * 1587 * While the mapping holds a reference on the contents of the object, it doesn't 1588 * imply a ref on the object itself. 1589 * 1590 * IMPORTANT: 1591 * 1592 * DRM driver writers who look a this function as an example for how to do GEM 1593 * mmap support, please don't implement mmap support like here. The modern way 1594 * to implement DRM mmap support is with an mmap offset ioctl (like 1595 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly. 1596 * That way debug tooling like valgrind will understand what's going on, hiding 1597 * the mmap call in a driver private ioctl will break that. The i915 driver only 1598 * does cpu mmaps this way because we didn't know better. 1599 */ 1600 int 1601 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1602 struct drm_file *file) 1603 { 1604 struct drm_i915_gem_mmap *args = data; 1605 struct drm_i915_gem_object *obj; 1606 unsigned long addr; 1607 1608 struct proc *p = curproc; 1609 vm_map_t map = &p->p_vmspace->vm_map; 1610 vm_size_t size; 1611 int error = 0, rv; 1612 1613 if (args->flags & ~(I915_MMAP_WC)) 1614 return -EINVAL; 1615 1616 #if 0 1617 if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT)) 1618 return -ENODEV; 1619 #endif 1620 1621 obj = i915_gem_object_lookup(file, args->handle); 1622 if (!obj) 1623 return -ENOENT; 1624 1625 if (args->size == 0) 1626 goto out; 1627 1628 size = round_page(args->size); 1629 if (map->size + size > p->p_rlimit[RLIMIT_VMEM].rlim_cur) { 1630 error = -ENOMEM; 1631 goto out; 1632 } 1633 1634 /* prime objects have no backing filp to GEM mmap 1635 * pages from. 1636 */ 1637 if (!obj->base.filp) { 1638 i915_gem_object_put_unlocked(obj); 1639 return -EINVAL; 1640 } 1641 1642 /* 1643 * Call hint to ensure that NULL is not returned as a valid address 1644 * and to reduce vm_map traversals. XXX causes instability, use a 1645 * fixed low address as the start point instead to avoid the NULL 1646 * return issue. 1647 */ 1648 addr = PAGE_SIZE; 1649 1650 /* 1651 * Use 256KB alignment. It is unclear why this matters for a 1652 * virtual address but it appears to fix a number of application/X 1653 * crashes and kms console switching is much faster. 1654 */ 1655 vm_object_hold(obj->base.filp); 1656 vm_object_reference_locked(obj->base.filp); 1657 vm_object_drop(obj->base.filp); 1658 1659 /* Something gets wrong here: fails to mmap 4096 */ 1660 rv = vm_map_find(map, obj->base.filp, NULL, 1661 args->offset, &addr, args->size, 1662 256 * 1024, /* align */ 1663 TRUE, /* fitit */ 1664 VM_MAPTYPE_NORMAL, VM_SUBSYS_DRM_GEM, 1665 VM_PROT_READ | VM_PROT_WRITE, /* prot */ 1666 VM_PROT_READ | VM_PROT_WRITE, /* max */ 1667 MAP_SHARED /* cow */); 1668 if (rv != KERN_SUCCESS) { 1669 vm_object_deallocate(obj->base.filp); 1670 error = -vm_mmap_to_errno(rv); 1671 } else { 1672 args->addr_ptr = (uint64_t)addr; 1673 } 1674 out: 1675 i915_gem_object_put_unlocked(obj); 1676 return (error); 1677 } 1678 1679 /** 1680 * i915_gem_fault - fault a page into the GTT 1681 * 1682 * vm_obj is locked on entry and expected to be locked on return. 1683 * 1684 * The vm_pager has placemarked the object with an anonymous memory page 1685 * which we must replace atomically to avoid races against concurrent faults 1686 * on the same page. XXX we currently are unable to do this atomically. 1687 * 1688 * If we are to return an error we should not touch the anonymous page, 1689 * the caller will deallocate it. 1690 * 1691 * XXX Most GEM calls appear to be interruptable, but we can't hard loop 1692 * in that case. Release all resources and wait 1 tick before retrying. 1693 * This is a huge problem which needs to be fixed by getting rid of most 1694 * of the interruptability. The linux code does not retry but does appear 1695 * to have some sort of mechanism (VM_FAULT_NOPAGE ?) for the higher level 1696 * to be able to retry. 1697 * 1698 * -- 1699 * @vma: VMA in question 1700 * @vmf: fault info 1701 * 1702 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1703 * from userspace. The fault handler takes care of binding the object to 1704 * the GTT (if needed), allocating and programming a fence register (again, 1705 * only if needed based on whether the old reg is still valid or the object 1706 * is tiled) and inserting a new PTE into the faulting process. 1707 * 1708 * Note that the faulting process may involve evicting existing objects 1709 * from the GTT and/or fence registers to make room. So performance may 1710 * suffer if the GTT working set is large or there are few fence registers 1711 * left. 1712 * 1713 * vm_obj is locked on entry and expected to be locked on return. The VM 1714 * pager has placed an anonymous memory page at (obj,offset) which we have 1715 * to replace. 1716 */ 1717 int i915_gem_fault(vm_object_t vm_obj, vm_ooffset_t offset, int prot, vm_page_t *mres) 1718 { 1719 struct drm_i915_gem_object *obj = to_intel_bo(vm_obj->handle); 1720 struct drm_device *dev = obj->base.dev; 1721 struct drm_i915_private *dev_priv = to_i915(dev); 1722 struct i915_ggtt *ggtt = &dev_priv->ggtt; 1723 struct i915_ggtt_view view = i915_ggtt_view_normal; 1724 unsigned long page_offset; 1725 vm_page_t m; 1726 int ret = 0; 1727 bool write = !!(prot & VM_PROT_WRITE); 1728 1729 intel_runtime_pm_get(dev_priv); 1730 1731 /* We don't use vmf->pgoff since that has the fake offset */ 1732 page_offset = (unsigned long)offset; 1733 1734 /* 1735 * vm_fault() has supplied us with a busied page placeholding 1736 * the operation. This presents a lock order reversal issue 1737 * again i915_gem_release_mmap() for our device mutex. 1738 * 1739 * Deal with the problem by getting rid of the placeholder now, 1740 * and then dealing with the potential for a new placeholder when 1741 * we try to insert later. 1742 */ 1743 if (*mres != NULL) { 1744 m = *mres; 1745 *mres = NULL; 1746 if ((m->busy_count & PBUSY_LOCKED) == 0) 1747 kprintf("i915_gem_fault: Page was not busy\n"); 1748 else 1749 vm_page_remove(m); 1750 vm_page_free(m); 1751 } 1752 1753 m = NULL; 1754 1755 retry: 1756 ret = i915_mutex_lock_interruptible(dev); 1757 if (ret) 1758 goto out; 1759 1760 trace_i915_gem_object_fault(obj, page_offset, true, write); 1761 1762 /* Try to flush the object off the GPU first without holding the lock. 1763 * Upon reacquiring the lock, we will perform our sanity checks and then 1764 * repeat the flush holding the lock in the normal manner to catch cases 1765 * where we are gazumped. 1766 */ 1767 ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write); 1768 if (ret) 1769 goto unlock; 1770 1771 /* Access to snoopable pages through the GTT is incoherent. */ 1772 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) { 1773 ret = -EFAULT; 1774 goto unlock; 1775 } 1776 1777 /* Use a partial view if the object is bigger than the aperture. */ 1778 if (obj->base.size >= ggtt->mappable_end && 1779 obj->tiling_mode == I915_TILING_NONE) { 1780 #if 0 1781 static const unsigned int chunk_size = 256; // 1 MiB 1782 1783 memset(&view, 0, sizeof(view)); 1784 view.type = I915_GGTT_VIEW_PARTIAL; 1785 view.params.partial.offset = rounddown(page_offset, chunk_size); 1786 view.params.partial.size = 1787 min_t(unsigned int, 1788 chunk_size, 1789 (vma->vm_end - vma->vm_start)/PAGE_SIZE - 1790 view.params.partial.offset); 1791 #endif 1792 } 1793 1794 /* Now pin it into the GTT if needed */ 1795 ret = i915_gem_object_ggtt_pin(obj, &view, 0, PIN_MAPPABLE); 1796 if (ret) 1797 goto unlock; 1798 1799 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1800 if (ret) 1801 goto unpin; 1802 1803 ret = i915_gem_object_get_fence(obj); 1804 if (ret) 1805 goto unpin; 1806 1807 /* 1808 * START FREEBSD MAGIC 1809 * 1810 * Add a pip count to avoid destruction and certain other 1811 * complex operations (such as collapses?) while unlocked. 1812 */ 1813 vm_object_pip_add(vm_obj, 1); 1814 1815 ret = 0; 1816 m = NULL; 1817 1818 /* 1819 * Since the object lock was dropped, another thread might have 1820 * faulted on the same GTT address and instantiated the mapping. 1821 * Recheck. 1822 */ 1823 m = vm_page_lookup(vm_obj, OFF_TO_IDX(offset)); 1824 if (m != NULL) { 1825 /* 1826 * Try to busy the page, retry on failure (non-zero ret). 1827 */ 1828 if (vm_page_busy_try(m, false)) { 1829 kprintf("i915_gem_fault: BUSY\n"); 1830 ret = -EINTR; 1831 goto unlock; 1832 } 1833 goto have_page; 1834 } 1835 /* END FREEBSD MAGIC */ 1836 1837 obj->fault_mappable = true; 1838 1839 /* Finally, remap it using the new GTT offset */ 1840 m = vm_phys_fictitious_to_vm_page(ggtt->mappable_base + 1841 i915_gem_obj_ggtt_offset_view(obj, &view) + offset); 1842 if (m == NULL) { 1843 ret = -EFAULT; 1844 goto unpin; 1845 } 1846 KASSERT((m->flags & PG_FICTITIOUS) != 0, ("not fictitious %p", m)); 1847 KASSERT(m->wire_count == 1, ("wire_count not 1 %p", m)); 1848 1849 /* 1850 * Try to busy the page. Fails on non-zero return. 1851 */ 1852 if (vm_page_busy_try(m, false)) { 1853 kprintf("i915_gem_fault: BUSY(2)\n"); 1854 ret = -EINTR; 1855 goto unpin; 1856 } 1857 m->valid = VM_PAGE_BITS_ALL; 1858 1859 #if 1 1860 /* 1861 * This should always work since we already checked via a lookup 1862 * above. 1863 */ 1864 if (vm_page_insert(m, vm_obj, OFF_TO_IDX(offset)) == FALSE) { 1865 kprintf("i915:gem_fault: page %p,%jd already in object\n", 1866 vm_obj, 1867 OFF_TO_IDX(offset)); 1868 vm_page_wakeup(m); 1869 ret = -EINTR; 1870 goto unpin; 1871 } 1872 #else 1873 /* NOT COMPILED ATM */ 1874 if (unlikely(view.type == I915_GGTT_VIEW_PARTIAL)) { 1875 /* Overriding existing pages in partial view does not cause 1876 * us any trouble as TLBs are still valid because the fault 1877 * is due to userspace losing part of the mapping or never 1878 * having accessed it before (at this partials' range). 1879 */ 1880 unsigned long base = vma->vm_start + 1881 (view.params.partial.offset << PAGE_SHIFT); 1882 unsigned int i; 1883 1884 for (i = 0; i < view.params.partial.size; i++) { 1885 ret = vm_insert_pfn(vma, base + i * PAGE_SIZE, pfn + i); 1886 if (ret) 1887 break; 1888 } 1889 1890 obj->fault_mappable = true; 1891 } else { 1892 if (!obj->fault_mappable) { 1893 unsigned long size = min_t(unsigned long, 1894 vma->vm_end - vma->vm_start, 1895 obj->base.size); 1896 int i; 1897 1898 for (i = 0; i < size >> PAGE_SHIFT; i++) { 1899 ret = vm_insert_pfn(vma, 1900 (unsigned long)vma->vm_start + i * PAGE_SIZE, 1901 pfn + i); 1902 if (ret) 1903 break; 1904 } 1905 1906 obj->fault_mappable = true; 1907 } else 1908 ret = vm_insert_pfn(vma, 1909 (unsigned long)vmf->virtual_address, 1910 pfn + page_offset); 1911 } 1912 #endif 1913 1914 have_page: 1915 *mres = m; 1916 1917 i915_gem_object_ggtt_unpin_view(obj, &view); 1918 mutex_unlock(&dev->struct_mutex); 1919 ret = VM_PAGER_OK; 1920 goto done; 1921 1922 /* 1923 * ALTERNATIVE ERROR RETURN. 1924 * 1925 * OBJECT EXPECTED TO BE LOCKED. 1926 */ 1927 unpin: 1928 i915_gem_object_ggtt_unpin_view(obj, &view); 1929 unlock: 1930 mutex_unlock(&dev->struct_mutex); 1931 out: 1932 switch (ret) { 1933 case -EIO: 1934 /* 1935 * We eat errors when the gpu is terminally wedged to avoid 1936 * userspace unduly crashing (gl has no provisions for mmaps to 1937 * fail). But any other -EIO isn't ours (e.g. swap in failure) 1938 * and so needs to be reported. 1939 */ 1940 if (!i915_terminally_wedged(&dev_priv->gpu_error)) { 1941 // ret = VM_FAULT_SIGBUS; 1942 break; 1943 } 1944 case -EAGAIN: 1945 /* 1946 * EAGAIN means the gpu is hung and we'll wait for the error 1947 * handler to reset everything when re-faulting in 1948 * i915_mutex_lock_interruptible. 1949 */ 1950 case -ERESTARTSYS: 1951 case -EINTR: 1952 VM_OBJECT_UNLOCK(vm_obj); 1953 int dummy; 1954 tsleep(&dummy, 0, "delay", 1); /* XXX */ 1955 VM_OBJECT_LOCK(vm_obj); 1956 goto retry; 1957 default: 1958 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 1959 ret = VM_PAGER_ERROR; 1960 break; 1961 } 1962 1963 done: 1964 vm_object_pip_wakeup(vm_obj); 1965 1966 intel_runtime_pm_put(dev_priv); 1967 return ret; 1968 } 1969 1970 /** 1971 * i915_gem_release_mmap - remove physical page mappings 1972 * @obj: obj in question 1973 * 1974 * Preserve the reservation of the mmapping with the DRM core code, but 1975 * relinquish ownership of the pages back to the system. 1976 * 1977 * It is vital that we remove the page mapping if we have mapped a tiled 1978 * object through the GTT and then lose the fence register due to 1979 * resource pressure. Similarly if the object has been moved out of the 1980 * aperture, than pages mapped into userspace must be revoked. Removing the 1981 * mapping will then trigger a page fault on the next user access, allowing 1982 * fixup by i915_gem_fault(). 1983 */ 1984 void 1985 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 1986 { 1987 vm_object_t devobj; 1988 vm_page_t m; 1989 int i, page_count; 1990 1991 /* Serialisation between user GTT access and our code depends upon 1992 * revoking the CPU's PTE whilst the mutex is held. The next user 1993 * pagefault then has to wait until we release the mutex. 1994 */ 1995 lockdep_assert_held(&obj->base.dev->struct_mutex); 1996 1997 if (!obj->fault_mappable) 1998 return; 1999 2000 devobj = cdev_pager_lookup(obj); 2001 if (devobj != NULL) { 2002 page_count = OFF_TO_IDX(obj->base.size); 2003 2004 VM_OBJECT_LOCK(devobj); 2005 for (i = 0; i < page_count; i++) { 2006 m = vm_page_lookup_busy_wait(devobj, i, TRUE, "915unm"); 2007 if (m == NULL) 2008 continue; 2009 cdev_pager_free_page(devobj, m); 2010 } 2011 VM_OBJECT_UNLOCK(devobj); 2012 vm_object_deallocate(devobj); 2013 } 2014 2015 /* Ensure that the CPU's PTE are revoked and there are not outstanding 2016 * memory transactions from userspace before we return. The TLB 2017 * flushing implied above by changing the PTE above *should* be 2018 * sufficient, an extra barrier here just provides us with a bit 2019 * of paranoid documentation about our requirement to serialise 2020 * memory writes before touching registers / GSM. 2021 */ 2022 wmb(); 2023 2024 obj->fault_mappable = false; 2025 } 2026 2027 void 2028 i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv) 2029 { 2030 struct drm_i915_gem_object *obj; 2031 2032 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) 2033 i915_gem_release_mmap(obj); 2034 } 2035 2036 uint32_t 2037 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) 2038 { 2039 uint32_t gtt_size; 2040 2041 if (INTEL_INFO(dev)->gen >= 4 || 2042 tiling_mode == I915_TILING_NONE) 2043 return size; 2044 2045 /* Previous chips need a power-of-two fence region when tiling */ 2046 if (IS_GEN3(dev)) 2047 gtt_size = 1024*1024; 2048 else 2049 gtt_size = 512*1024; 2050 2051 while (gtt_size < size) 2052 gtt_size <<= 1; 2053 2054 return gtt_size; 2055 } 2056 2057 /** 2058 * i915_gem_get_gtt_alignment - return required GTT alignment for an object 2059 * @dev: drm device 2060 * @size: object size 2061 * @tiling_mode: tiling mode 2062 * @fenced: is fenced alignemned required or not 2063 * 2064 * Return the required GTT alignment for an object, taking into account 2065 * potential fence register mapping. 2066 */ 2067 uint32_t 2068 i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size, 2069 int tiling_mode, bool fenced) 2070 { 2071 /* 2072 * Minimum alignment is 4k (GTT page size), but might be greater 2073 * if a fence register is needed for the object. 2074 */ 2075 if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) || 2076 tiling_mode == I915_TILING_NONE) 2077 return 4096; 2078 2079 /* 2080 * Previous chips need to be aligned to the size of the smallest 2081 * fence register that can contain the object. 2082 */ 2083 return i915_gem_get_gtt_size(dev, size, tiling_mode); 2084 } 2085 2086 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) 2087 { 2088 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 2089 int ret; 2090 2091 dev_priv->mm.shrinker_no_lock_stealing = true; 2092 2093 ret = drm_gem_create_mmap_offset(&obj->base); 2094 if (ret != -ENOSPC) 2095 goto out; 2096 2097 /* Badly fragmented mmap space? The only way we can recover 2098 * space is by destroying unwanted objects. We can't randomly release 2099 * mmap_offsets as userspace expects them to be persistent for the 2100 * lifetime of the objects. The closest we can is to release the 2101 * offsets on purgeable objects by truncating it and marking it purged, 2102 * which prevents userspace from ever using that object again. 2103 */ 2104 i915_gem_shrink(dev_priv, 2105 obj->base.size >> PAGE_SHIFT, 2106 I915_SHRINK_BOUND | 2107 I915_SHRINK_UNBOUND | 2108 I915_SHRINK_PURGEABLE); 2109 ret = drm_gem_create_mmap_offset(&obj->base); 2110 if (ret != -ENOSPC) 2111 goto out; 2112 2113 i915_gem_shrink_all(dev_priv); 2114 ret = drm_gem_create_mmap_offset(&obj->base); 2115 out: 2116 dev_priv->mm.shrinker_no_lock_stealing = false; 2117 2118 return ret; 2119 } 2120 2121 #if 0 2122 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) 2123 { 2124 drm_gem_free_mmap_offset(&obj->base); 2125 } 2126 #endif 2127 2128 int 2129 i915_gem_mmap_gtt(struct drm_file *file, 2130 struct drm_device *dev, 2131 uint32_t handle, 2132 uint64_t *offset) 2133 { 2134 struct drm_i915_gem_object *obj; 2135 int ret; 2136 2137 ret = i915_mutex_lock_interruptible(dev); 2138 if (ret) 2139 return ret; 2140 2141 obj = i915_gem_object_lookup(file, handle); 2142 if (!obj) { 2143 ret = -ENOENT; 2144 goto unlock; 2145 } 2146 2147 if (obj->madv != I915_MADV_WILLNEED) { 2148 DRM_DEBUG("Attempting to mmap a purgeable buffer\n"); 2149 ret = -EFAULT; 2150 goto out; 2151 } 2152 2153 ret = i915_gem_object_create_mmap_offset(obj); 2154 if (ret) 2155 goto out; 2156 2157 *offset = DRM_GEM_MAPPING_OFF(obj->base.map_list.key) | 2158 DRM_GEM_MAPPING_KEY; 2159 2160 out: 2161 i915_gem_object_put(obj); 2162 unlock: 2163 mutex_unlock(&dev->struct_mutex); 2164 return ret; 2165 } 2166 2167 /** 2168 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 2169 * @dev: DRM device 2170 * @data: GTT mapping ioctl data 2171 * @file: GEM object info 2172 * 2173 * Simply returns the fake offset to userspace so it can mmap it. 2174 * The mmap call will end up in drm_gem_mmap(), which will set things 2175 * up so we can get faults in the handler above. 2176 * 2177 * The fault handler will take care of binding the object into the GTT 2178 * (since it may have been evicted to make room for something), allocating 2179 * a fence register, and mapping the appropriate aperture address into 2180 * userspace. 2181 */ 2182 int 2183 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 2184 struct drm_file *file) 2185 { 2186 struct drm_i915_gem_mmap_gtt *args = data; 2187 2188 return i915_gem_mmap_gtt(file, dev, args->handle, (uint64_t *)&args->offset); 2189 } 2190 2191 /* Immediately discard the backing storage */ 2192 static void 2193 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 2194 { 2195 vm_object_t vm_obj = obj->base.filp; 2196 2197 if (obj->base.filp == NULL) 2198 return; 2199 2200 VM_OBJECT_LOCK(vm_obj); 2201 vm_object_page_remove(vm_obj, 0, 0, false); 2202 VM_OBJECT_UNLOCK(vm_obj); 2203 2204 /* Our goal here is to return as much of the memory as 2205 * is possible back to the system as we are called from OOM. 2206 * To do this we must instruct the shmfs to drop all of its 2207 * backing pages, *now*. 2208 */ 2209 #if 0 2210 shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1); 2211 #endif 2212 obj->madv = __I915_MADV_PURGED; 2213 } 2214 2215 /* Try to discard unwanted pages */ 2216 static void 2217 i915_gem_object_invalidate(struct drm_i915_gem_object *obj) 2218 { 2219 #if 0 2220 struct address_space *mapping; 2221 #endif 2222 2223 switch (obj->madv) { 2224 case I915_MADV_DONTNEED: 2225 i915_gem_object_truncate(obj); 2226 case __I915_MADV_PURGED: 2227 return; 2228 } 2229 2230 if (obj->base.filp == NULL) 2231 return; 2232 2233 #if 0 2234 mapping = file_inode(obj->base.filp)->i_mapping, 2235 #endif 2236 invalidate_mapping_pages(obj->base.filp, 0, (loff_t)-1); 2237 } 2238 2239 static void 2240 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) 2241 { 2242 struct sgt_iter sgt_iter; 2243 struct page *page; 2244 int ret; 2245 2246 BUG_ON(obj->madv == __I915_MADV_PURGED); 2247 2248 ret = i915_gem_object_set_to_cpu_domain(obj, true); 2249 if (WARN_ON(ret)) { 2250 /* In the event of a disaster, abandon all caches and 2251 * hope for the best. 2252 */ 2253 i915_gem_clflush_object(obj, true); 2254 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 2255 } 2256 2257 i915_gem_gtt_finish_object(obj); 2258 2259 if (i915_gem_object_needs_bit17_swizzle(obj)) 2260 i915_gem_object_save_bit_17_swizzle(obj); 2261 2262 if (obj->madv == I915_MADV_DONTNEED) 2263 obj->dirty = 0; 2264 2265 for_each_sgt_page(page, sgt_iter, obj->pages) { 2266 if (obj->dirty) 2267 set_page_dirty(page); 2268 2269 if (obj->madv == I915_MADV_WILLNEED) 2270 mark_page_accessed(page); 2271 2272 vm_page_busy_wait((struct vm_page *)page, FALSE, "i915gem"); 2273 vm_page_unwire((struct vm_page *)page, 1); 2274 vm_page_wakeup((struct vm_page *)page); 2275 } 2276 obj->dirty = 0; 2277 2278 sg_free_table(obj->pages); 2279 kfree(obj->pages); 2280 } 2281 2282 int 2283 i915_gem_object_put_pages(struct drm_i915_gem_object *obj) 2284 { 2285 const struct drm_i915_gem_object_ops *ops = obj->ops; 2286 2287 if (obj->pages == NULL) 2288 return 0; 2289 2290 if (obj->pages_pin_count) 2291 return -EBUSY; 2292 2293 BUG_ON(i915_gem_obj_bound_any(obj)); 2294 2295 /* ->put_pages might need to allocate memory for the bit17 swizzle 2296 * array, hence protect them from being reaped by removing them from gtt 2297 * lists early. */ 2298 list_del(&obj->global_list); 2299 2300 if (obj->mapping) { 2301 if (is_vmalloc_addr(obj->mapping)) 2302 vunmap(obj->mapping); 2303 else 2304 kunmap(kmap_to_page(obj->mapping)); 2305 obj->mapping = NULL; 2306 } 2307 2308 ops->put_pages(obj); 2309 obj->pages = NULL; 2310 2311 i915_gem_object_invalidate(obj); 2312 2313 return 0; 2314 } 2315 2316 static int 2317 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 2318 { 2319 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 2320 int page_count, i; 2321 vm_object_t vm_obj; 2322 struct sg_table *st; 2323 struct scatterlist *sg; 2324 struct sgt_iter sgt_iter; 2325 struct page *page; 2326 unsigned long last_pfn = 0; /* suppress gcc warning */ 2327 int ret; 2328 2329 /* Assert that the object is not currently in any GPU domain. As it 2330 * wasn't in the GTT, there shouldn't be any way it could have been in 2331 * a GPU cache 2332 */ 2333 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); 2334 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); 2335 2336 st = kmalloc(sizeof(*st), M_DRM, GFP_KERNEL); 2337 if (st == NULL) 2338 return -ENOMEM; 2339 2340 page_count = obj->base.size / PAGE_SIZE; 2341 if (sg_alloc_table(st, page_count, GFP_KERNEL)) { 2342 kfree(st); 2343 return -ENOMEM; 2344 } 2345 2346 /* Get the list of pages out of our struct file. They'll be pinned 2347 * at this point until we release them. 2348 * 2349 * Fail silently without starting the shrinker 2350 */ 2351 vm_obj = obj->base.filp; 2352 VM_OBJECT_LOCK(vm_obj); 2353 sg = st->sgl; 2354 st->nents = 0; 2355 for (i = 0; i < page_count; i++) { 2356 page = shmem_read_mapping_page(vm_obj, i); 2357 if (IS_ERR(page)) { 2358 i915_gem_shrink(dev_priv, 2359 page_count, 2360 I915_SHRINK_BOUND | 2361 I915_SHRINK_UNBOUND | 2362 I915_SHRINK_PURGEABLE); 2363 page = shmem_read_mapping_page(vm_obj, i); 2364 } 2365 if (IS_ERR(page)) { 2366 /* We've tried hard to allocate the memory by reaping 2367 * our own buffer, now let the real VM do its job and 2368 * go down in flames if truly OOM. 2369 */ 2370 i915_gem_shrink_all(dev_priv); 2371 page = shmem_read_mapping_page(vm_obj, i); 2372 if (IS_ERR(page)) { 2373 ret = PTR_ERR(page); 2374 goto err_sg; 2375 } 2376 } 2377 #ifdef CONFIG_SWIOTLB 2378 if (swiotlb_nr_tbl()) { 2379 st->nents++; 2380 sg_set_page(sg, page, PAGE_SIZE, 0); 2381 sg = sg_next(sg); 2382 continue; 2383 } 2384 #endif 2385 if (!i || page_to_pfn(page) != last_pfn + 1) { 2386 if (i) 2387 sg = sg_next(sg); 2388 st->nents++; 2389 sg_set_page(sg, page, PAGE_SIZE, 0); 2390 } else { 2391 sg->length += PAGE_SIZE; 2392 } 2393 last_pfn = page_to_pfn(page); 2394 2395 /* Check that the i965g/gm workaround works. */ 2396 } 2397 #ifdef CONFIG_SWIOTLB 2398 if (!swiotlb_nr_tbl()) 2399 #endif 2400 sg_mark_end(sg); 2401 obj->pages = st; 2402 VM_OBJECT_UNLOCK(vm_obj); 2403 2404 ret = i915_gem_gtt_prepare_object(obj); 2405 if (ret) 2406 goto err_pages; 2407 2408 if (i915_gem_object_needs_bit17_swizzle(obj)) 2409 i915_gem_object_do_bit_17_swizzle(obj); 2410 2411 if (obj->tiling_mode != I915_TILING_NONE && 2412 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) 2413 i915_gem_object_pin_pages(obj); 2414 2415 return 0; 2416 2417 err_sg: 2418 sg_mark_end(sg); 2419 err_pages: 2420 for_each_sgt_page(page, sgt_iter, st) 2421 { 2422 struct vm_page *vmp = (struct vm_page *)page; 2423 vm_page_busy_wait(vmp, FALSE, "i915gem"); 2424 vm_page_unwire(vmp, 0); 2425 vm_page_wakeup(vmp); 2426 } 2427 VM_OBJECT_UNLOCK(vm_obj); 2428 sg_free_table(st); 2429 kfree(st); 2430 2431 /* shmemfs first checks if there is enough memory to allocate the page 2432 * and reports ENOSPC should there be insufficient, along with the usual 2433 * ENOMEM for a genuine allocation failure. 2434 * 2435 * We use ENOSPC in our driver to mean that we have run out of aperture 2436 * space and so want to translate the error from shmemfs back to our 2437 * usual understanding of ENOMEM. 2438 */ 2439 if (ret == -ENOSPC) 2440 ret = -ENOMEM; 2441 2442 return ret; 2443 } 2444 2445 /* Ensure that the associated pages are gathered from the backing storage 2446 * and pinned into our object. i915_gem_object_get_pages() may be called 2447 * multiple times before they are released by a single call to 2448 * i915_gem_object_put_pages() - once the pages are no longer referenced 2449 * either as a result of memory pressure (reaping pages under the shrinker) 2450 * or as the object is itself released. 2451 */ 2452 int 2453 i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2454 { 2455 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 2456 const struct drm_i915_gem_object_ops *ops = obj->ops; 2457 int ret; 2458 2459 if (obj->pages) 2460 return 0; 2461 2462 if (obj->madv != I915_MADV_WILLNEED) { 2463 DRM_DEBUG("Attempting to obtain a purgeable object\n"); 2464 return -EFAULT; 2465 } 2466 2467 BUG_ON(obj->pages_pin_count); 2468 2469 ret = ops->get_pages(obj); 2470 if (ret) 2471 return ret; 2472 2473 list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list); 2474 2475 obj->get_page.sg = obj->pages->sgl; 2476 obj->get_page.last = 0; 2477 2478 return 0; 2479 } 2480 2481 /* The 'mapping' part of i915_gem_object_pin_map() below */ 2482 static void *i915_gem_object_map(const struct drm_i915_gem_object *obj) 2483 { 2484 unsigned long n_pages = obj->base.size >> PAGE_SHIFT; 2485 struct sg_table *sgt = obj->pages; 2486 struct sgt_iter sgt_iter; 2487 struct page *page; 2488 struct page *stack_pages[32]; 2489 struct page **pages = stack_pages; 2490 unsigned long i = 0; 2491 void *addr; 2492 2493 /* A single page can always be kmapped */ 2494 if (n_pages == 1) 2495 return kmap(sg_page(sgt->sgl)); 2496 2497 if (n_pages > ARRAY_SIZE(stack_pages)) { 2498 /* Too big for stack -- allocate temporary array instead */ 2499 pages = drm_malloc_gfp(n_pages, sizeof(*pages), GFP_TEMPORARY); 2500 if (!pages) 2501 return NULL; 2502 } 2503 2504 for_each_sgt_page(page, sgt_iter, sgt) 2505 pages[i++] = page; 2506 2507 /* Check that we have the expected number of pages */ 2508 GEM_BUG_ON(i != n_pages); 2509 2510 addr = vmap(pages, n_pages, 0, PAGE_KERNEL); 2511 2512 if (pages != stack_pages) 2513 drm_free_large(pages); 2514 2515 return addr; 2516 } 2517 2518 /* get, pin, and map the pages of the object into kernel space */ 2519 void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj) 2520 { 2521 int ret; 2522 2523 lockdep_assert_held(&obj->base.dev->struct_mutex); 2524 2525 ret = i915_gem_object_get_pages(obj); 2526 if (ret) 2527 return ERR_PTR(ret); 2528 2529 i915_gem_object_pin_pages(obj); 2530 2531 if (!obj->mapping) { 2532 obj->mapping = i915_gem_object_map(obj); 2533 if (!obj->mapping) { 2534 i915_gem_object_unpin_pages(obj); 2535 return ERR_PTR(-ENOMEM); 2536 } 2537 } 2538 2539 return obj->mapping; 2540 } 2541 2542 void i915_vma_move_to_active(struct i915_vma *vma, 2543 struct drm_i915_gem_request *req) 2544 { 2545 struct drm_i915_gem_object *obj = vma->obj; 2546 struct intel_engine_cs *engine; 2547 2548 engine = i915_gem_request_get_engine(req); 2549 2550 /* Add a reference if we're newly entering the active list. */ 2551 if (obj->active == 0) 2552 i915_gem_object_get(obj); 2553 obj->active |= intel_engine_flag(engine); 2554 2555 list_move_tail(&obj->engine_list[engine->id], &engine->active_list); 2556 i915_gem_request_assign(&obj->last_read_req[engine->id], req); 2557 2558 list_move_tail(&vma->vm_link, &vma->vm->active_list); 2559 } 2560 2561 static void 2562 i915_gem_object_retire__write(struct drm_i915_gem_object *obj) 2563 { 2564 GEM_BUG_ON(obj->last_write_req == NULL); 2565 GEM_BUG_ON(!(obj->active & intel_engine_flag(obj->last_write_req->engine))); 2566 2567 i915_gem_request_assign(&obj->last_write_req, NULL); 2568 intel_fb_obj_flush(obj, true, ORIGIN_CS); 2569 } 2570 2571 static void 2572 i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring) 2573 { 2574 struct i915_vma *vma; 2575 2576 GEM_BUG_ON(obj->last_read_req[ring] == NULL); 2577 GEM_BUG_ON(!(obj->active & (1 << ring))); 2578 2579 list_del_init(&obj->engine_list[ring]); 2580 i915_gem_request_assign(&obj->last_read_req[ring], NULL); 2581 2582 if (obj->last_write_req && obj->last_write_req->engine->id == ring) 2583 i915_gem_object_retire__write(obj); 2584 2585 obj->active &= ~(1 << ring); 2586 if (obj->active) 2587 return; 2588 2589 /* Bump our place on the bound list to keep it roughly in LRU order 2590 * so that we don't steal from recently used but inactive objects 2591 * (unless we are forced to ofc!) 2592 */ 2593 list_move_tail(&obj->global_list, 2594 &to_i915(obj->base.dev)->mm.bound_list); 2595 2596 list_for_each_entry(vma, &obj->vma_list, obj_link) { 2597 if (!list_empty(&vma->vm_link)) 2598 list_move_tail(&vma->vm_link, &vma->vm->inactive_list); 2599 } 2600 2601 i915_gem_request_assign(&obj->last_fenced_req, NULL); 2602 i915_gem_object_put(obj); 2603 } 2604 2605 static bool i915_context_is_banned(const struct i915_gem_context *ctx) 2606 { 2607 unsigned long elapsed; 2608 2609 if (ctx->hang_stats.banned) 2610 return true; 2611 2612 elapsed = get_seconds() - ctx->hang_stats.guilty_ts; 2613 if (ctx->hang_stats.ban_period_seconds && 2614 elapsed <= ctx->hang_stats.ban_period_seconds) { 2615 DRM_DEBUG("context hanging too fast, banning!\n"); 2616 return true; 2617 } 2618 2619 return false; 2620 } 2621 2622 static void i915_set_reset_status(struct i915_gem_context *ctx, 2623 const bool guilty) 2624 { 2625 struct i915_ctx_hang_stats *hs = &ctx->hang_stats; 2626 2627 if (guilty) { 2628 hs->banned = i915_context_is_banned(ctx); 2629 hs->batch_active++; 2630 hs->guilty_ts = get_seconds(); 2631 } else { 2632 hs->batch_pending++; 2633 } 2634 } 2635 2636 struct drm_i915_gem_request * 2637 i915_gem_find_active_request(struct intel_engine_cs *engine) 2638 { 2639 struct drm_i915_gem_request *request; 2640 2641 /* We are called by the error capture and reset at a random 2642 * point in time. In particular, note that neither is crucially 2643 * ordered with an interrupt. After a hang, the GPU is dead and we 2644 * assume that no more writes can happen (we waited long enough for 2645 * all writes that were in transaction to be flushed) - adding an 2646 * extra delay for a recent interrupt is pointless. Hence, we do 2647 * not need an engine->irq_seqno_barrier() before the seqno reads. 2648 */ 2649 list_for_each_entry(request, &engine->request_list, list) { 2650 if (i915_gem_request_completed(request)) 2651 continue; 2652 2653 return request; 2654 } 2655 2656 return NULL; 2657 } 2658 2659 static void i915_gem_reset_engine_status(struct intel_engine_cs *engine) 2660 { 2661 struct drm_i915_gem_request *request; 2662 bool ring_hung; 2663 2664 request = i915_gem_find_active_request(engine); 2665 if (request == NULL) 2666 return; 2667 2668 ring_hung = engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG; 2669 2670 i915_set_reset_status(request->ctx, ring_hung); 2671 list_for_each_entry_continue(request, &engine->request_list, list) 2672 i915_set_reset_status(request->ctx, false); 2673 } 2674 2675 static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine) 2676 { 2677 struct intel_ringbuffer *buffer; 2678 2679 while (!list_empty(&engine->active_list)) { 2680 struct drm_i915_gem_object *obj; 2681 2682 obj = list_first_entry(&engine->active_list, 2683 struct drm_i915_gem_object, 2684 engine_list[engine->id]); 2685 2686 i915_gem_object_retire__read(obj, engine->id); 2687 } 2688 2689 /* Mark all pending requests as complete so that any concurrent 2690 * (lockless) lookup doesn't try and wait upon the request as we 2691 * reset it. 2692 */ 2693 intel_ring_init_seqno(engine, engine->last_submitted_seqno); 2694 2695 /* 2696 * Clear the execlists queue up before freeing the requests, as those 2697 * are the ones that keep the context and ringbuffer backing objects 2698 * pinned in place. 2699 */ 2700 2701 if (i915.enable_execlists) { 2702 /* Ensure irq handler finishes or is cancelled. */ 2703 tasklet_kill(&engine->irq_tasklet); 2704 2705 intel_execlists_cancel_requests(engine); 2706 } 2707 2708 /* 2709 * We must free the requests after all the corresponding objects have 2710 * been moved off active lists. Which is the same order as the normal 2711 * retire_requests function does. This is important if object hold 2712 * implicit references on things like e.g. ppgtt address spaces through 2713 * the request. 2714 */ 2715 if (!list_empty(&engine->request_list)) { 2716 struct drm_i915_gem_request *request; 2717 2718 request = list_last_entry(&engine->request_list, 2719 struct drm_i915_gem_request, 2720 list); 2721 2722 i915_gem_request_retire_upto(request); 2723 } 2724 2725 /* Having flushed all requests from all queues, we know that all 2726 * ringbuffers must now be empty. However, since we do not reclaim 2727 * all space when retiring the request (to prevent HEADs colliding 2728 * with rapid ringbuffer wraparound) the amount of available space 2729 * upon reset is less than when we start. Do one more pass over 2730 * all the ringbuffers to reset last_retired_head. 2731 */ 2732 list_for_each_entry(buffer, &engine->buffers, link) { 2733 buffer->last_retired_head = buffer->tail; 2734 intel_ring_update_space(buffer); 2735 } 2736 2737 engine->i915->gt.active_engines &= ~intel_engine_flag(engine); 2738 } 2739 2740 void i915_gem_reset(struct drm_device *dev) 2741 { 2742 struct drm_i915_private *dev_priv = to_i915(dev); 2743 struct intel_engine_cs *engine; 2744 2745 /* 2746 * Before we free the objects from the requests, we need to inspect 2747 * them for finding the guilty party. As the requests only borrow 2748 * their reference to the objects, the inspection must be done first. 2749 */ 2750 for_each_engine(engine, dev_priv) 2751 i915_gem_reset_engine_status(engine); 2752 2753 for_each_engine(engine, dev_priv) 2754 i915_gem_reset_engine_cleanup(engine); 2755 mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0); 2756 2757 i915_gem_context_reset(dev); 2758 2759 i915_gem_restore_fences(dev); 2760 2761 WARN_ON(i915_verify_lists(dev)); 2762 } 2763 2764 /** 2765 * This function clears the request list as sequence numbers are passed. 2766 * @engine: engine to retire requests on 2767 */ 2768 void 2769 i915_gem_retire_requests_ring(struct intel_engine_cs *engine) 2770 { 2771 WARN_ON(i915_verify_lists(engine->dev)); 2772 2773 /* Retire requests first as we use it above for the early return. 2774 * If we retire requests last, we may use a later seqno and so clear 2775 * the requests lists without clearing the active list, leading to 2776 * confusion. 2777 */ 2778 while (!list_empty(&engine->request_list)) { 2779 struct drm_i915_gem_request *request; 2780 2781 request = list_first_entry(&engine->request_list, 2782 struct drm_i915_gem_request, 2783 list); 2784 2785 if (!i915_gem_request_completed(request)) 2786 break; 2787 2788 i915_gem_request_retire_upto(request); 2789 } 2790 2791 /* Move any buffers on the active list that are no longer referenced 2792 * by the ringbuffer to the flushing/inactive lists as appropriate, 2793 * before we free the context associated with the requests. 2794 */ 2795 while (!list_empty(&engine->active_list)) { 2796 struct drm_i915_gem_object *obj; 2797 2798 obj = list_first_entry(&engine->active_list, 2799 struct drm_i915_gem_object, 2800 engine_list[engine->id]); 2801 2802 if (!list_empty(&obj->last_read_req[engine->id]->list)) 2803 break; 2804 2805 i915_gem_object_retire__read(obj, engine->id); 2806 } 2807 2808 WARN_ON(i915_verify_lists(engine->dev)); 2809 } 2810 2811 void i915_gem_retire_requests(struct drm_i915_private *dev_priv) 2812 { 2813 struct intel_engine_cs *engine; 2814 2815 lockdep_assert_held(&dev_priv->drm.struct_mutex); 2816 2817 if (dev_priv->gt.active_engines == 0) 2818 return; 2819 2820 GEM_BUG_ON(!dev_priv->gt.awake); 2821 2822 for_each_engine(engine, dev_priv) { 2823 i915_gem_retire_requests_ring(engine); 2824 if (list_empty(&engine->request_list)) 2825 dev_priv->gt.active_engines &= ~intel_engine_flag(engine); 2826 } 2827 2828 if (dev_priv->gt.active_engines == 0) 2829 queue_delayed_work(dev_priv->wq, 2830 &dev_priv->gt.idle_work, 2831 msecs_to_jiffies(100)); 2832 } 2833 2834 static void 2835 i915_gem_retire_work_handler(struct work_struct *work) 2836 { 2837 struct drm_i915_private *dev_priv = 2838 container_of(work, typeof(*dev_priv), gt.retire_work.work); 2839 struct drm_device *dev = &dev_priv->drm; 2840 2841 /* Come back later if the device is busy... */ 2842 if (mutex_trylock(&dev->struct_mutex)) { 2843 i915_gem_retire_requests(dev_priv); 2844 mutex_unlock(&dev->struct_mutex); 2845 } 2846 2847 /* Keep the retire handler running until we are finally idle. 2848 * We do not need to do this test under locking as in the worst-case 2849 * we queue the retire worker once too often. 2850 */ 2851 if (READ_ONCE(dev_priv->gt.awake)) { 2852 i915_queue_hangcheck(dev_priv); 2853 queue_delayed_work(dev_priv->wq, 2854 &dev_priv->gt.retire_work, 2855 round_jiffies_up_relative(HZ)); 2856 } 2857 } 2858 2859 static void 2860 i915_gem_idle_work_handler(struct work_struct *work) 2861 { 2862 struct drm_i915_private *dev_priv = 2863 container_of(work, typeof(*dev_priv), gt.idle_work.work); 2864 struct drm_device *dev = &dev_priv->drm; 2865 struct intel_engine_cs *engine; 2866 unsigned int stuck_engines; 2867 bool rearm_hangcheck; 2868 2869 if (!READ_ONCE(dev_priv->gt.awake)) 2870 return; 2871 2872 if (READ_ONCE(dev_priv->gt.active_engines)) 2873 return; 2874 2875 rearm_hangcheck = 2876 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); 2877 2878 if (!mutex_trylock(&dev->struct_mutex)) { 2879 /* Currently busy, come back later */ 2880 mod_delayed_work(dev_priv->wq, 2881 &dev_priv->gt.idle_work, 2882 msecs_to_jiffies(50)); 2883 goto out_rearm; 2884 } 2885 2886 if (dev_priv->gt.active_engines) 2887 goto out_unlock; 2888 2889 for_each_engine(engine, dev_priv) 2890 i915_gem_batch_pool_fini(&engine->batch_pool); 2891 2892 GEM_BUG_ON(!dev_priv->gt.awake); 2893 dev_priv->gt.awake = false; 2894 rearm_hangcheck = false; 2895 2896 /* As we have disabled hangcheck, we need to unstick any waiters still 2897 * hanging around. However, as we may be racing against the interrupt 2898 * handler or the waiters themselves, we skip enabling the fake-irq. 2899 */ 2900 stuck_engines = intel_kick_waiters(dev_priv); 2901 if (unlikely(stuck_engines)) 2902 DRM_DEBUG_DRIVER("kicked stuck waiters (%x)...missed irq?\n", 2903 stuck_engines); 2904 2905 if (INTEL_GEN(dev_priv) >= 6) 2906 gen6_rps_idle(dev_priv); 2907 intel_runtime_pm_put(dev_priv); 2908 out_unlock: 2909 mutex_unlock(&dev->struct_mutex); 2910 2911 out_rearm: 2912 if (rearm_hangcheck) { 2913 GEM_BUG_ON(!dev_priv->gt.awake); 2914 i915_queue_hangcheck(dev_priv); 2915 } 2916 } 2917 2918 /** 2919 * Ensures that an object will eventually get non-busy by flushing any required 2920 * write domains, emitting any outstanding lazy request and retiring and 2921 * completed requests. 2922 * @obj: object to flush 2923 */ 2924 static int 2925 i915_gem_object_flush_active(struct drm_i915_gem_object *obj) 2926 { 2927 int i; 2928 2929 if (!obj->active) 2930 return 0; 2931 2932 for (i = 0; i < I915_NUM_ENGINES; i++) { 2933 struct drm_i915_gem_request *req; 2934 2935 req = obj->last_read_req[i]; 2936 if (req == NULL) 2937 continue; 2938 2939 if (i915_gem_request_completed(req)) 2940 i915_gem_object_retire__read(obj, i); 2941 } 2942 2943 return 0; 2944 } 2945 2946 /** 2947 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT 2948 * @dev: drm device pointer 2949 * @data: ioctl data blob 2950 * @file: drm file pointer 2951 * 2952 * Returns 0 if successful, else an error is returned with the remaining time in 2953 * the timeout parameter. 2954 * -ETIME: object is still busy after timeout 2955 * -ERESTARTSYS: signal interrupted the wait 2956 * -ENONENT: object doesn't exist 2957 * Also possible, but rare: 2958 * -EAGAIN: GPU wedged 2959 * -ENOMEM: damn 2960 * -ENODEV: Internal IRQ fail 2961 * -E?: The add request failed 2962 * 2963 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any 2964 * non-zero timeout parameter the wait ioctl will wait for the given number of 2965 * nanoseconds on an object becoming unbusy. Since the wait itself does so 2966 * without holding struct_mutex the object may become re-busied before this 2967 * function completes. A similar but shorter * race condition exists in the busy 2968 * ioctl 2969 */ 2970 int 2971 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 2972 { 2973 struct drm_i915_gem_wait *args = data; 2974 struct drm_i915_gem_object *obj; 2975 struct drm_i915_gem_request *req[I915_NUM_ENGINES]; 2976 int i, n = 0; 2977 int ret; 2978 2979 if (args->flags != 0) 2980 return -EINVAL; 2981 2982 ret = i915_mutex_lock_interruptible(dev); 2983 if (ret) 2984 return ret; 2985 2986 obj = i915_gem_object_lookup(file, args->bo_handle); 2987 if (!obj) { 2988 mutex_unlock(&dev->struct_mutex); 2989 return -ENOENT; 2990 } 2991 2992 /* Need to make sure the object gets inactive eventually. */ 2993 ret = i915_gem_object_flush_active(obj); 2994 if (ret) 2995 goto out; 2996 2997 if (!obj->active) 2998 goto out; 2999 3000 /* Do this after OLR check to make sure we make forward progress polling 3001 * on this IOCTL with a timeout == 0 (like busy ioctl) 3002 */ 3003 if (args->timeout_ns == 0) { 3004 ret = -ETIME; 3005 goto out; 3006 } 3007 3008 i915_gem_object_put(obj); 3009 3010 for (i = 0; i < I915_NUM_ENGINES; i++) { 3011 if (obj->last_read_req[i] == NULL) 3012 continue; 3013 3014 req[n++] = i915_gem_request_get(obj->last_read_req[i]); 3015 } 3016 3017 mutex_unlock(&dev->struct_mutex); 3018 3019 for (i = 0; i < n; i++) { 3020 if (ret == 0) 3021 ret = __i915_wait_request(req[i], true, 3022 args->timeout_ns > 0 ? &args->timeout_ns : NULL, 3023 to_rps_client(file)); 3024 i915_gem_request_put(req[i]); 3025 } 3026 return ret; 3027 3028 out: 3029 i915_gem_object_put(obj); 3030 mutex_unlock(&dev->struct_mutex); 3031 return ret; 3032 } 3033 3034 static int 3035 __i915_gem_object_sync(struct drm_i915_gem_object *obj, 3036 struct intel_engine_cs *to, 3037 struct drm_i915_gem_request *from_req, 3038 struct drm_i915_gem_request **to_req) 3039 { 3040 struct intel_engine_cs *from; 3041 int ret; 3042 3043 from = i915_gem_request_get_engine(from_req); 3044 if (to == from) 3045 return 0; 3046 3047 if (i915_gem_request_completed(from_req)) 3048 return 0; 3049 3050 if (!i915.semaphores) { 3051 struct drm_i915_private *i915 = to_i915(obj->base.dev); 3052 ret = __i915_wait_request(from_req, 3053 i915->mm.interruptible, 3054 NULL, 3055 NO_WAITBOOST); 3056 if (ret) 3057 return ret; 3058 3059 i915_gem_object_retire_request(obj, from_req); 3060 } else { 3061 int idx = intel_ring_sync_index(from, to); 3062 u32 seqno = i915_gem_request_get_seqno(from_req); 3063 3064 WARN_ON(!to_req); 3065 3066 if (seqno <= from->semaphore.sync_seqno[idx]) 3067 return 0; 3068 3069 if (*to_req == NULL) { 3070 struct drm_i915_gem_request *req; 3071 3072 req = i915_gem_request_alloc(to, NULL); 3073 if (IS_ERR(req)) 3074 return PTR_ERR(req); 3075 3076 *to_req = req; 3077 } 3078 3079 trace_i915_gem_ring_sync_to(*to_req, from, from_req); 3080 ret = to->semaphore.sync_to(*to_req, from, seqno); 3081 if (ret) 3082 return ret; 3083 3084 /* We use last_read_req because sync_to() 3085 * might have just caused seqno wrap under 3086 * the radar. 3087 */ 3088 from->semaphore.sync_seqno[idx] = 3089 i915_gem_request_get_seqno(obj->last_read_req[from->id]); 3090 } 3091 3092 return 0; 3093 } 3094 3095 /** 3096 * i915_gem_object_sync - sync an object to a ring. 3097 * 3098 * @obj: object which may be in use on another ring. 3099 * @to: ring we wish to use the object on. May be NULL. 3100 * @to_req: request we wish to use the object for. See below. 3101 * This will be allocated and returned if a request is 3102 * required but not passed in. 3103 * 3104 * This code is meant to abstract object synchronization with the GPU. 3105 * Calling with NULL implies synchronizing the object with the CPU 3106 * rather than a particular GPU ring. Conceptually we serialise writes 3107 * between engines inside the GPU. We only allow one engine to write 3108 * into a buffer at any time, but multiple readers. To ensure each has 3109 * a coherent view of memory, we must: 3110 * 3111 * - If there is an outstanding write request to the object, the new 3112 * request must wait for it to complete (either CPU or in hw, requests 3113 * on the same ring will be naturally ordered). 3114 * 3115 * - If we are a write request (pending_write_domain is set), the new 3116 * request must wait for outstanding read requests to complete. 3117 * 3118 * For CPU synchronisation (NULL to) no request is required. For syncing with 3119 * rings to_req must be non-NULL. However, a request does not have to be 3120 * pre-allocated. If *to_req is NULL and sync commands will be emitted then a 3121 * request will be allocated automatically and returned through *to_req. Note 3122 * that it is not guaranteed that commands will be emitted (because the system 3123 * might already be idle). Hence there is no need to create a request that 3124 * might never have any work submitted. Note further that if a request is 3125 * returned in *to_req, it is the responsibility of the caller to submit 3126 * that request (after potentially adding more work to it). 3127 * 3128 * Returns 0 if successful, else propagates up the lower layer error. 3129 */ 3130 int 3131 i915_gem_object_sync(struct drm_i915_gem_object *obj, 3132 struct intel_engine_cs *to, 3133 struct drm_i915_gem_request **to_req) 3134 { 3135 const bool readonly = obj->base.pending_write_domain == 0; 3136 struct drm_i915_gem_request *req[I915_NUM_ENGINES]; 3137 int ret, i, n; 3138 3139 if (!obj->active) 3140 return 0; 3141 3142 if (to == NULL) 3143 return i915_gem_object_wait_rendering(obj, readonly); 3144 3145 n = 0; 3146 if (readonly) { 3147 if (obj->last_write_req) 3148 req[n++] = obj->last_write_req; 3149 } else { 3150 for (i = 0; i < I915_NUM_ENGINES; i++) 3151 if (obj->last_read_req[i]) 3152 req[n++] = obj->last_read_req[i]; 3153 } 3154 for (i = 0; i < n; i++) { 3155 ret = __i915_gem_object_sync(obj, to, req[i], to_req); 3156 if (ret) 3157 return ret; 3158 } 3159 3160 return 0; 3161 } 3162 3163 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) 3164 { 3165 u32 old_write_domain, old_read_domains; 3166 3167 /* Force a pagefault for domain tracking on next user access */ 3168 i915_gem_release_mmap(obj); 3169 3170 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3171 return; 3172 3173 old_read_domains = obj->base.read_domains; 3174 old_write_domain = obj->base.write_domain; 3175 3176 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT; 3177 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT; 3178 3179 trace_i915_gem_object_change_domain(obj, 3180 old_read_domains, 3181 old_write_domain); 3182 } 3183 3184 static void __i915_vma_iounmap(struct i915_vma *vma) 3185 { 3186 GEM_BUG_ON(vma->pin_count); 3187 3188 if (vma->iomap == NULL) 3189 return; 3190 3191 io_mapping_unmap(vma->iomap); 3192 vma->iomap = NULL; 3193 } 3194 3195 static int __i915_vma_unbind(struct i915_vma *vma, bool wait) 3196 { 3197 struct drm_i915_gem_object *obj = vma->obj; 3198 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 3199 int ret; 3200 3201 if (list_empty(&vma->obj_link)) 3202 return 0; 3203 3204 if (!drm_mm_node_allocated(&vma->node)) { 3205 i915_gem_vma_destroy(vma); 3206 return 0; 3207 } 3208 3209 if (vma->pin_count) 3210 return -EBUSY; 3211 3212 BUG_ON(obj->pages == NULL); 3213 3214 if (wait) { 3215 ret = i915_gem_object_wait_rendering(obj, false); 3216 if (ret) 3217 return ret; 3218 } 3219 3220 if (vma->is_ggtt && vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { 3221 i915_gem_object_finish_gtt(obj); 3222 3223 /* release the fence reg _after_ flushing */ 3224 ret = i915_gem_object_put_fence(obj); 3225 if (ret) 3226 return ret; 3227 3228 __i915_vma_iounmap(vma); 3229 } 3230 3231 trace_i915_vma_unbind(vma); 3232 3233 vma->vm->unbind_vma(vma); 3234 vma->bound = 0; 3235 3236 list_del_init(&vma->vm_link); 3237 if (vma->is_ggtt) { 3238 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { 3239 obj->map_and_fenceable = false; 3240 } else if (vma->ggtt_view.pages) { 3241 sg_free_table(vma->ggtt_view.pages); 3242 kfree(vma->ggtt_view.pages); 3243 } 3244 vma->ggtt_view.pages = NULL; 3245 } 3246 3247 drm_mm_remove_node(&vma->node); 3248 i915_gem_vma_destroy(vma); 3249 3250 /* Since the unbound list is global, only move to that list if 3251 * no more VMAs exist. */ 3252 if (list_empty(&obj->vma_list)) 3253 list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list); 3254 3255 /* And finally now the object is completely decoupled from this vma, 3256 * we can drop its hold on the backing storage and allow it to be 3257 * reaped by the shrinker. 3258 */ 3259 i915_gem_object_unpin_pages(obj); 3260 3261 return 0; 3262 } 3263 3264 int i915_vma_unbind(struct i915_vma *vma) 3265 { 3266 return __i915_vma_unbind(vma, true); 3267 } 3268 3269 int __i915_vma_unbind_no_wait(struct i915_vma *vma) 3270 { 3271 return __i915_vma_unbind(vma, false); 3272 } 3273 3274 int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv) 3275 { 3276 struct intel_engine_cs *engine; 3277 int ret; 3278 3279 lockdep_assert_held(&dev_priv->drm.struct_mutex); 3280 3281 for_each_engine(engine, dev_priv) { 3282 if (engine->last_context == NULL) 3283 continue; 3284 3285 ret = intel_engine_idle(engine); 3286 if (ret) 3287 return ret; 3288 } 3289 3290 WARN_ON(i915_verify_lists(dev)); 3291 return 0; 3292 } 3293 3294 static bool i915_gem_valid_gtt_space(struct i915_vma *vma, 3295 unsigned long cache_level) 3296 { 3297 struct drm_mm_node *gtt_space = &vma->node; 3298 struct drm_mm_node *other; 3299 3300 /* 3301 * On some machines we have to be careful when putting differing types 3302 * of snoopable memory together to avoid the prefetcher crossing memory 3303 * domains and dying. During vm initialisation, we decide whether or not 3304 * these constraints apply and set the drm_mm.color_adjust 3305 * appropriately. 3306 */ 3307 if (vma->vm->mm.color_adjust == NULL) 3308 return true; 3309 3310 if (!drm_mm_node_allocated(gtt_space)) 3311 return true; 3312 3313 if (list_empty(>t_space->node_list)) 3314 return true; 3315 3316 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list); 3317 if (other->allocated && !other->hole_follows && other->color != cache_level) 3318 return false; 3319 3320 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list); 3321 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level) 3322 return false; 3323 3324 return true; 3325 } 3326 3327 /** 3328 * Finds free space in the GTT aperture and binds the object or a view of it 3329 * there. 3330 * @obj: object to bind 3331 * @vm: address space to bind into 3332 * @ggtt_view: global gtt view if applicable 3333 * @alignment: requested alignment 3334 * @flags: mask of PIN_* flags to use 3335 */ 3336 static struct i915_vma * 3337 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, 3338 struct i915_address_space *vm, 3339 const struct i915_ggtt_view *ggtt_view, 3340 unsigned alignment, 3341 uint64_t flags) 3342 { 3343 struct drm_device *dev = obj->base.dev; 3344 struct drm_i915_private *dev_priv = to_i915(dev); 3345 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3346 u32 fence_alignment, unfenced_alignment; 3347 u32 search_flag, alloc_flag; 3348 u64 start, end; 3349 u64 size, fence_size; 3350 struct i915_vma *vma; 3351 int ret; 3352 3353 if (i915_is_ggtt(vm)) { 3354 u32 view_size; 3355 3356 if (WARN_ON(!ggtt_view)) 3357 return ERR_PTR(-EINVAL); 3358 3359 view_size = i915_ggtt_view_size(obj, ggtt_view); 3360 3361 fence_size = i915_gem_get_gtt_size(dev, 3362 view_size, 3363 obj->tiling_mode); 3364 fence_alignment = i915_gem_get_gtt_alignment(dev, 3365 view_size, 3366 obj->tiling_mode, 3367 true); 3368 unfenced_alignment = i915_gem_get_gtt_alignment(dev, 3369 view_size, 3370 obj->tiling_mode, 3371 false); 3372 size = flags & PIN_MAPPABLE ? fence_size : view_size; 3373 } else { 3374 fence_size = i915_gem_get_gtt_size(dev, 3375 obj->base.size, 3376 obj->tiling_mode); 3377 fence_alignment = i915_gem_get_gtt_alignment(dev, 3378 obj->base.size, 3379 obj->tiling_mode, 3380 true); 3381 unfenced_alignment = 3382 i915_gem_get_gtt_alignment(dev, 3383 obj->base.size, 3384 obj->tiling_mode, 3385 false); 3386 size = flags & PIN_MAPPABLE ? fence_size : obj->base.size; 3387 } 3388 3389 start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; 3390 end = vm->total; 3391 if (flags & PIN_MAPPABLE) 3392 end = min_t(u64, end, ggtt->mappable_end); 3393 if (flags & PIN_ZONE_4G) 3394 end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE); 3395 3396 if (alignment == 0) 3397 alignment = flags & PIN_MAPPABLE ? fence_alignment : 3398 unfenced_alignment; 3399 if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) { 3400 DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n", 3401 ggtt_view ? ggtt_view->type : 0, 3402 alignment); 3403 return ERR_PTR(-EINVAL); 3404 } 3405 3406 /* If binding the object/GGTT view requires more space than the entire 3407 * aperture has, reject it early before evicting everything in a vain 3408 * attempt to find space. 3409 */ 3410 if (size > end) { 3411 DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%llu > %s aperture=%llu\n", 3412 ggtt_view ? ggtt_view->type : 0, 3413 size, 3414 flags & PIN_MAPPABLE ? "mappable" : "total", 3415 end); 3416 return ERR_PTR(-E2BIG); 3417 } 3418 3419 ret = i915_gem_object_get_pages(obj); 3420 if (ret) 3421 return ERR_PTR(ret); 3422 3423 i915_gem_object_pin_pages(obj); 3424 3425 vma = ggtt_view ? i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) : 3426 i915_gem_obj_lookup_or_create_vma(obj, vm); 3427 3428 if (IS_ERR(vma)) 3429 goto err_unpin; 3430 3431 if (flags & PIN_OFFSET_FIXED) { 3432 uint64_t offset = flags & PIN_OFFSET_MASK; 3433 3434 if (offset & (alignment - 1) || offset + size > end) { 3435 ret = -EINVAL; 3436 goto err_free_vma; 3437 } 3438 vma->node.start = offset; 3439 vma->node.size = size; 3440 vma->node.color = obj->cache_level; 3441 ret = drm_mm_reserve_node(&vm->mm, &vma->node); 3442 if (ret) { 3443 ret = i915_gem_evict_for_vma(vma); 3444 if (ret == 0) 3445 ret = drm_mm_reserve_node(&vm->mm, &vma->node); 3446 } 3447 if (ret) 3448 goto err_free_vma; 3449 } else { 3450 if (flags & PIN_HIGH) { 3451 search_flag = DRM_MM_SEARCH_BELOW; 3452 alloc_flag = DRM_MM_CREATE_TOP; 3453 } else { 3454 search_flag = DRM_MM_SEARCH_DEFAULT; 3455 alloc_flag = DRM_MM_CREATE_DEFAULT; 3456 } 3457 3458 search_free: 3459 ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, 3460 size, alignment, 3461 obj->cache_level, 3462 start, end, 3463 search_flag, 3464 alloc_flag); 3465 if (ret) { 3466 ret = i915_gem_evict_something(dev, vm, size, alignment, 3467 obj->cache_level, 3468 start, end, 3469 flags); 3470 if (ret == 0) 3471 goto search_free; 3472 3473 goto err_free_vma; 3474 } 3475 } 3476 if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) { 3477 ret = -EINVAL; 3478 goto err_remove_node; 3479 } 3480 3481 trace_i915_vma_bind(vma, flags); 3482 ret = i915_vma_bind(vma, obj->cache_level, flags); 3483 if (ret) 3484 goto err_remove_node; 3485 3486 list_move_tail(&obj->global_list, &dev_priv->mm.bound_list); 3487 list_add_tail(&vma->vm_link, &vm->inactive_list); 3488 3489 return vma; 3490 3491 err_remove_node: 3492 drm_mm_remove_node(&vma->node); 3493 err_free_vma: 3494 i915_gem_vma_destroy(vma); 3495 vma = ERR_PTR(ret); 3496 err_unpin: 3497 i915_gem_object_unpin_pages(obj); 3498 return vma; 3499 } 3500 3501 bool 3502 i915_gem_clflush_object(struct drm_i915_gem_object *obj, 3503 bool force) 3504 { 3505 /* If we don't have a page list set up, then we're not pinned 3506 * to GPU, and we can ignore the cache flush because it'll happen 3507 * again at bind time. 3508 */ 3509 if (obj->pages == NULL) 3510 return false; 3511 3512 /* 3513 * Stolen memory is always coherent with the GPU as it is explicitly 3514 * marked as wc by the system, or the system is cache-coherent. 3515 */ 3516 if (obj->stolen || obj->phys_handle) 3517 return false; 3518 3519 /* If the GPU is snooping the contents of the CPU cache, 3520 * we do not need to manually clear the CPU cache lines. However, 3521 * the caches are only snooped when the render cache is 3522 * flushed/invalidated. As we always have to emit invalidations 3523 * and flushes when moving into and out of the RENDER domain, correct 3524 * snooping behaviour occurs naturally as the result of our domain 3525 * tracking. 3526 */ 3527 if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) { 3528 obj->cache_dirty = true; 3529 return false; 3530 } 3531 3532 trace_i915_gem_object_clflush(obj); 3533 drm_clflush_sg(obj->pages); 3534 obj->cache_dirty = false; 3535 3536 return true; 3537 } 3538 3539 /** Flushes the GTT write domain for the object if it's dirty. */ 3540 static void 3541 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) 3542 { 3543 uint32_t old_write_domain; 3544 3545 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) 3546 return; 3547 3548 /* No actual flushing is required for the GTT write domain. Writes 3549 * to it immediately go to main memory as far as we know, so there's 3550 * no chipset flush. It also doesn't land in render cache. 3551 * 3552 * However, we do have to enforce the order so that all writes through 3553 * the GTT land before any writes to the device, such as updates to 3554 * the GATT itself. 3555 */ 3556 wmb(); 3557 3558 old_write_domain = obj->base.write_domain; 3559 obj->base.write_domain = 0; 3560 3561 intel_fb_obj_flush(obj, false, ORIGIN_GTT); 3562 3563 trace_i915_gem_object_change_domain(obj, 3564 obj->base.read_domains, 3565 old_write_domain); 3566 } 3567 3568 /** Flushes the CPU write domain for the object if it's dirty. */ 3569 static void 3570 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) 3571 { 3572 uint32_t old_write_domain; 3573 3574 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) 3575 return; 3576 3577 if (i915_gem_clflush_object(obj, obj->pin_display)) 3578 i915_gem_chipset_flush(to_i915(obj->base.dev)); 3579 3580 old_write_domain = obj->base.write_domain; 3581 obj->base.write_domain = 0; 3582 3583 intel_fb_obj_flush(obj, false, ORIGIN_CPU); 3584 3585 trace_i915_gem_object_change_domain(obj, 3586 obj->base.read_domains, 3587 old_write_domain); 3588 } 3589 3590 /** 3591 * Moves a single object to the GTT read, and possibly write domain. 3592 * @obj: object to act on 3593 * @write: ask for write access or read only 3594 * 3595 * This function returns when the move is complete, including waiting on 3596 * flushes to occur. 3597 */ 3598 int 3599 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 3600 { 3601 struct drm_device *dev = obj->base.dev; 3602 struct drm_i915_private *dev_priv = to_i915(dev); 3603 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3604 uint32_t old_write_domain, old_read_domains; 3605 struct i915_vma *vma; 3606 int ret; 3607 3608 ret = i915_gem_object_wait_rendering(obj, !write); 3609 if (ret) 3610 return ret; 3611 3612 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) 3613 return 0; 3614 3615 /* Flush and acquire obj->pages so that we are coherent through 3616 * direct access in memory with previous cached writes through 3617 * shmemfs and that our cache domain tracking remains valid. 3618 * For example, if the obj->filp was moved to swap without us 3619 * being notified and releasing the pages, we would mistakenly 3620 * continue to assume that the obj remained out of the CPU cached 3621 * domain. 3622 */ 3623 ret = i915_gem_object_get_pages(obj); 3624 if (ret) 3625 return ret; 3626 3627 i915_gem_object_flush_cpu_write_domain(obj); 3628 3629 /* Serialise direct access to this object with the barriers for 3630 * coherent writes from the GPU, by effectively invalidating the 3631 * GTT domain upon first access. 3632 */ 3633 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3634 mb(); 3635 3636 old_write_domain = obj->base.write_domain; 3637 old_read_domains = obj->base.read_domains; 3638 3639 /* It should now be out of any other write domains, and we can update 3640 * the domain values for our changes. 3641 */ 3642 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 3643 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 3644 if (write) { 3645 obj->base.read_domains = I915_GEM_DOMAIN_GTT; 3646 obj->base.write_domain = I915_GEM_DOMAIN_GTT; 3647 obj->dirty = 1; 3648 } 3649 3650 trace_i915_gem_object_change_domain(obj, 3651 old_read_domains, 3652 old_write_domain); 3653 3654 /* And bump the LRU for this access */ 3655 vma = i915_gem_obj_to_ggtt(obj); 3656 if (vma && drm_mm_node_allocated(&vma->node) && !obj->active) 3657 list_move_tail(&vma->vm_link, 3658 &ggtt->base.inactive_list); 3659 3660 return 0; 3661 } 3662 3663 /** 3664 * Changes the cache-level of an object across all VMA. 3665 * @obj: object to act on 3666 * @cache_level: new cache level to set for the object 3667 * 3668 * After this function returns, the object will be in the new cache-level 3669 * across all GTT and the contents of the backing storage will be coherent, 3670 * with respect to the new cache-level. In order to keep the backing storage 3671 * coherent for all users, we only allow a single cache level to be set 3672 * globally on the object and prevent it from being changed whilst the 3673 * hardware is reading from the object. That is if the object is currently 3674 * on the scanout it will be set to uncached (or equivalent display 3675 * cache coherency) and all non-MOCS GPU access will also be uncached so 3676 * that all direct access to the scanout remains coherent. 3677 */ 3678 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 3679 enum i915_cache_level cache_level) 3680 { 3681 struct drm_device *dev = obj->base.dev; 3682 struct i915_vma *vma, *next; 3683 bool bound = false; 3684 int ret = 0; 3685 3686 if (obj->cache_level == cache_level) 3687 goto out; 3688 3689 /* Inspect the list of currently bound VMA and unbind any that would 3690 * be invalid given the new cache-level. This is principally to 3691 * catch the issue of the CS prefetch crossing page boundaries and 3692 * reading an invalid PTE on older architectures. 3693 */ 3694 list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) { 3695 if (!drm_mm_node_allocated(&vma->node)) 3696 continue; 3697 3698 if (vma->pin_count) { 3699 DRM_DEBUG("can not change the cache level of pinned objects\n"); 3700 return -EBUSY; 3701 } 3702 3703 if (!i915_gem_valid_gtt_space(vma, cache_level)) { 3704 ret = i915_vma_unbind(vma); 3705 if (ret) 3706 return ret; 3707 } else 3708 bound = true; 3709 } 3710 3711 /* We can reuse the existing drm_mm nodes but need to change the 3712 * cache-level on the PTE. We could simply unbind them all and 3713 * rebind with the correct cache-level on next use. However since 3714 * we already have a valid slot, dma mapping, pages etc, we may as 3715 * rewrite the PTE in the belief that doing so tramples upon less 3716 * state and so involves less work. 3717 */ 3718 if (bound) { 3719 /* Before we change the PTE, the GPU must not be accessing it. 3720 * If we wait upon the object, we know that all the bound 3721 * VMA are no longer active. 3722 */ 3723 ret = i915_gem_object_wait_rendering(obj, false); 3724 if (ret) 3725 return ret; 3726 3727 if (!HAS_LLC(dev) && cache_level != I915_CACHE_NONE) { 3728 /* Access to snoopable pages through the GTT is 3729 * incoherent and on some machines causes a hard 3730 * lockup. Relinquish the CPU mmaping to force 3731 * userspace to refault in the pages and we can 3732 * then double check if the GTT mapping is still 3733 * valid for that pointer access. 3734 */ 3735 i915_gem_release_mmap(obj); 3736 3737 /* As we no longer need a fence for GTT access, 3738 * we can relinquish it now (and so prevent having 3739 * to steal a fence from someone else on the next 3740 * fence request). Note GPU activity would have 3741 * dropped the fence as all snoopable access is 3742 * supposed to be linear. 3743 */ 3744 ret = i915_gem_object_put_fence(obj); 3745 if (ret) 3746 return ret; 3747 } else { 3748 /* We either have incoherent backing store and 3749 * so no GTT access or the architecture is fully 3750 * coherent. In such cases, existing GTT mmaps 3751 * ignore the cache bit in the PTE and we can 3752 * rewrite it without confusing the GPU or having 3753 * to force userspace to fault back in its mmaps. 3754 */ 3755 } 3756 3757 list_for_each_entry(vma, &obj->vma_list, obj_link) { 3758 if (!drm_mm_node_allocated(&vma->node)) 3759 continue; 3760 3761 ret = i915_vma_bind(vma, cache_level, PIN_UPDATE); 3762 if (ret) 3763 return ret; 3764 } 3765 } 3766 3767 list_for_each_entry(vma, &obj->vma_list, obj_link) 3768 vma->node.color = cache_level; 3769 obj->cache_level = cache_level; 3770 3771 out: 3772 /* Flush the dirty CPU caches to the backing storage so that the 3773 * object is now coherent at its new cache level (with respect 3774 * to the access domain). 3775 */ 3776 if (obj->cache_dirty && cpu_write_needs_clflush(obj)) { 3777 if (i915_gem_clflush_object(obj, true)) 3778 i915_gem_chipset_flush(to_i915(obj->base.dev)); 3779 } 3780 3781 return 0; 3782 } 3783 3784 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 3785 struct drm_file *file) 3786 { 3787 struct drm_i915_gem_caching *args = data; 3788 struct drm_i915_gem_object *obj; 3789 3790 obj = i915_gem_object_lookup(file, args->handle); 3791 if (!obj) 3792 return -ENOENT; 3793 3794 switch (obj->cache_level) { 3795 case I915_CACHE_LLC: 3796 case I915_CACHE_L3_LLC: 3797 args->caching = I915_CACHING_CACHED; 3798 break; 3799 3800 case I915_CACHE_WT: 3801 args->caching = I915_CACHING_DISPLAY; 3802 break; 3803 3804 default: 3805 args->caching = I915_CACHING_NONE; 3806 break; 3807 } 3808 3809 i915_gem_object_put_unlocked(obj); 3810 return 0; 3811 } 3812 3813 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 3814 struct drm_file *file) 3815 { 3816 struct drm_i915_private *dev_priv = to_i915(dev); 3817 struct drm_i915_gem_caching *args = data; 3818 struct drm_i915_gem_object *obj; 3819 enum i915_cache_level level; 3820 int ret; 3821 3822 switch (args->caching) { 3823 case I915_CACHING_NONE: 3824 level = I915_CACHE_NONE; 3825 break; 3826 case I915_CACHING_CACHED: 3827 /* 3828 * Due to a HW issue on BXT A stepping, GPU stores via a 3829 * snooped mapping may leave stale data in a corresponding CPU 3830 * cacheline, whereas normally such cachelines would get 3831 * invalidated. 3832 */ 3833 if (!HAS_LLC(dev) && !HAS_SNOOP(dev)) 3834 return -ENODEV; 3835 3836 level = I915_CACHE_LLC; 3837 break; 3838 case I915_CACHING_DISPLAY: 3839 level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE; 3840 break; 3841 default: 3842 return -EINVAL; 3843 } 3844 3845 intel_runtime_pm_get(dev_priv); 3846 3847 ret = i915_mutex_lock_interruptible(dev); 3848 if (ret) 3849 goto rpm_put; 3850 3851 obj = i915_gem_object_lookup(file, args->handle); 3852 if (!obj) { 3853 ret = -ENOENT; 3854 goto unlock; 3855 } 3856 3857 ret = i915_gem_object_set_cache_level(obj, level); 3858 3859 i915_gem_object_put(obj); 3860 unlock: 3861 mutex_unlock(&dev->struct_mutex); 3862 rpm_put: 3863 intel_runtime_pm_put(dev_priv); 3864 3865 return ret; 3866 } 3867 3868 /* 3869 * Prepare buffer for display plane (scanout, cursors, etc). 3870 * Can be called from an uninterruptible phase (modesetting) and allows 3871 * any flushes to be pipelined (for pageflips). 3872 */ 3873 int 3874 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 3875 u32 alignment, 3876 const struct i915_ggtt_view *view) 3877 { 3878 u32 old_read_domains, old_write_domain; 3879 int ret; 3880 3881 /* Mark the pin_display early so that we account for the 3882 * display coherency whilst setting up the cache domains. 3883 */ 3884 obj->pin_display++; 3885 3886 /* The display engine is not coherent with the LLC cache on gen6. As 3887 * a result, we make sure that the pinning that is about to occur is 3888 * done with uncached PTEs. This is lowest common denominator for all 3889 * chipsets. 3890 * 3891 * However for gen6+, we could do better by using the GFDT bit instead 3892 * of uncaching, which would allow us to flush all the LLC-cached data 3893 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 3894 */ 3895 ret = i915_gem_object_set_cache_level(obj, 3896 HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE); 3897 if (ret) 3898 goto err_unpin_display; 3899 3900 /* As the user may map the buffer once pinned in the display plane 3901 * (e.g. libkms for the bootup splash), we have to ensure that we 3902 * always use map_and_fenceable for all scanout buffers. 3903 */ 3904 ret = i915_gem_object_ggtt_pin(obj, view, alignment, 3905 view->type == I915_GGTT_VIEW_NORMAL ? 3906 PIN_MAPPABLE : 0); 3907 if (ret) 3908 goto err_unpin_display; 3909 3910 i915_gem_object_flush_cpu_write_domain(obj); 3911 3912 old_write_domain = obj->base.write_domain; 3913 old_read_domains = obj->base.read_domains; 3914 3915 /* It should now be out of any other write domains, and we can update 3916 * the domain values for our changes. 3917 */ 3918 obj->base.write_domain = 0; 3919 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 3920 3921 trace_i915_gem_object_change_domain(obj, 3922 old_read_domains, 3923 old_write_domain); 3924 3925 return 0; 3926 3927 err_unpin_display: 3928 obj->pin_display--; 3929 return ret; 3930 } 3931 3932 void 3933 i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj, 3934 const struct i915_ggtt_view *view) 3935 { 3936 if (WARN_ON(obj->pin_display == 0)) 3937 return; 3938 3939 i915_gem_object_ggtt_unpin_view(obj, view); 3940 3941 obj->pin_display--; 3942 } 3943 3944 /** 3945 * Moves a single object to the CPU read, and possibly write domain. 3946 * @obj: object to act on 3947 * @write: requesting write or read-only access 3948 * 3949 * This function returns when the move is complete, including waiting on 3950 * flushes to occur. 3951 */ 3952 int 3953 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 3954 { 3955 uint32_t old_write_domain, old_read_domains; 3956 int ret; 3957 3958 ret = i915_gem_object_wait_rendering(obj, !write); 3959 if (ret) 3960 return ret; 3961 3962 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) 3963 return 0; 3964 3965 i915_gem_object_flush_gtt_write_domain(obj); 3966 3967 old_write_domain = obj->base.write_domain; 3968 old_read_domains = obj->base.read_domains; 3969 3970 /* Flush the CPU cache if it's still invalid. */ 3971 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { 3972 i915_gem_clflush_object(obj, false); 3973 3974 obj->base.read_domains |= I915_GEM_DOMAIN_CPU; 3975 } 3976 3977 /* It should now be out of any other write domains, and we can update 3978 * the domain values for our changes. 3979 */ 3980 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 3981 3982 /* If we're writing through the CPU, then the GPU read domains will 3983 * need to be invalidated at next use. 3984 */ 3985 if (write) { 3986 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 3987 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 3988 } 3989 3990 trace_i915_gem_object_change_domain(obj, 3991 old_read_domains, 3992 old_write_domain); 3993 3994 return 0; 3995 } 3996 3997 /* Throttle our rendering by waiting until the ring has completed our requests 3998 * emitted over 20 msec ago. 3999 * 4000 * Note that if we were to use the current jiffies each time around the loop, 4001 * we wouldn't escape the function with any frames outstanding if the time to 4002 * render a frame was over 20ms. 4003 * 4004 * This should get us reasonable parallelism between CPU and GPU but also 4005 * relatively low latency when blocking on a particular request to finish. 4006 */ 4007 static int 4008 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 4009 { 4010 struct drm_i915_private *dev_priv = to_i915(dev); 4011 struct drm_i915_file_private *file_priv = file->driver_priv; 4012 unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES; 4013 struct drm_i915_gem_request *request, *target = NULL; 4014 int ret; 4015 4016 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 4017 if (ret) 4018 return ret; 4019 4020 /* ABI: return -EIO if already wedged */ 4021 if (i915_terminally_wedged(&dev_priv->gpu_error)) 4022 return -EIO; 4023 4024 lockmgr(&file_priv->mm.lock, LK_EXCLUSIVE); 4025 list_for_each_entry(request, &file_priv->mm.request_list, client_list) { 4026 if (time_after_eq(request->emitted_jiffies, recent_enough)) 4027 break; 4028 4029 /* 4030 * Note that the request might not have been submitted yet. 4031 * In which case emitted_jiffies will be zero. 4032 */ 4033 if (!request->emitted_jiffies) 4034 continue; 4035 4036 target = request; 4037 } 4038 if (target) 4039 i915_gem_request_get(target); 4040 lockmgr(&file_priv->mm.lock, LK_RELEASE); 4041 4042 if (target == NULL) 4043 return 0; 4044 4045 ret = __i915_wait_request(target, true, NULL, NULL); 4046 i915_gem_request_put(target); 4047 4048 return ret; 4049 } 4050 4051 static bool 4052 i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags) 4053 { 4054 struct drm_i915_gem_object *obj = vma->obj; 4055 4056 if (alignment && 4057 vma->node.start & (alignment - 1)) 4058 return true; 4059 4060 if (flags & PIN_MAPPABLE && !obj->map_and_fenceable) 4061 return true; 4062 4063 if (flags & PIN_OFFSET_BIAS && 4064 vma->node.start < (flags & PIN_OFFSET_MASK)) 4065 return true; 4066 4067 if (flags & PIN_OFFSET_FIXED && 4068 vma->node.start != (flags & PIN_OFFSET_MASK)) 4069 return true; 4070 4071 return false; 4072 } 4073 4074 void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) 4075 { 4076 struct drm_i915_gem_object *obj = vma->obj; 4077 bool mappable, fenceable; 4078 u32 fence_size, fence_alignment; 4079 4080 fence_size = i915_gem_get_gtt_size(obj->base.dev, 4081 obj->base.size, 4082 obj->tiling_mode); 4083 fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev, 4084 obj->base.size, 4085 obj->tiling_mode, 4086 true); 4087 4088 fenceable = (vma->node.size == fence_size && 4089 (vma->node.start & (fence_alignment - 1)) == 0); 4090 4091 mappable = (vma->node.start + fence_size <= 4092 to_i915(obj->base.dev)->ggtt.mappable_end); 4093 4094 obj->map_and_fenceable = mappable && fenceable; 4095 } 4096 4097 static int 4098 i915_gem_object_do_pin(struct drm_i915_gem_object *obj, 4099 struct i915_address_space *vm, 4100 const struct i915_ggtt_view *ggtt_view, 4101 uint32_t alignment, 4102 uint64_t flags) 4103 { 4104 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 4105 struct i915_vma *vma; 4106 unsigned bound; 4107 int ret; 4108 4109 if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base)) 4110 return -ENODEV; 4111 4112 if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm))) 4113 return -EINVAL; 4114 4115 if (WARN_ON((flags & (PIN_MAPPABLE | PIN_GLOBAL)) == PIN_MAPPABLE)) 4116 return -EINVAL; 4117 4118 if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view)) 4119 return -EINVAL; 4120 4121 vma = ggtt_view ? i915_gem_obj_to_ggtt_view(obj, ggtt_view) : 4122 i915_gem_obj_to_vma(obj, vm); 4123 4124 if (vma) { 4125 if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) 4126 return -EBUSY; 4127 4128 if (i915_vma_misplaced(vma, alignment, flags)) { 4129 WARN(vma->pin_count, 4130 "bo is already pinned in %s with incorrect alignment:" 4131 " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d," 4132 " obj->map_and_fenceable=%d\n", 4133 ggtt_view ? "ggtt" : "ppgtt", 4134 upper_32_bits(vma->node.start), 4135 lower_32_bits(vma->node.start), 4136 alignment, 4137 !!(flags & PIN_MAPPABLE), 4138 obj->map_and_fenceable); 4139 ret = i915_vma_unbind(vma); 4140 if (ret) 4141 return ret; 4142 4143 vma = NULL; 4144 } 4145 } 4146 4147 bound = vma ? vma->bound : 0; 4148 if (vma == NULL || !drm_mm_node_allocated(&vma->node)) { 4149 vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment, 4150 flags); 4151 if (IS_ERR(vma)) 4152 return PTR_ERR(vma); 4153 } else { 4154 ret = i915_vma_bind(vma, obj->cache_level, flags); 4155 if (ret) 4156 return ret; 4157 } 4158 4159 if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL && 4160 (bound ^ vma->bound) & GLOBAL_BIND) { 4161 __i915_vma_set_map_and_fenceable(vma); 4162 WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable); 4163 } 4164 4165 vma->pin_count++; 4166 return 0; 4167 } 4168 4169 int 4170 i915_gem_object_pin(struct drm_i915_gem_object *obj, 4171 struct i915_address_space *vm, 4172 uint32_t alignment, 4173 uint64_t flags) 4174 { 4175 return i915_gem_object_do_pin(obj, vm, 4176 i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL, 4177 alignment, flags); 4178 } 4179 4180 int 4181 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, 4182 const struct i915_ggtt_view *view, 4183 uint32_t alignment, 4184 uint64_t flags) 4185 { 4186 struct drm_device *dev = obj->base.dev; 4187 struct drm_i915_private *dev_priv = to_i915(dev); 4188 struct i915_ggtt *ggtt = &dev_priv->ggtt; 4189 4190 BUG_ON(!view); 4191 4192 return i915_gem_object_do_pin(obj, &ggtt->base, view, 4193 alignment, flags | PIN_GLOBAL); 4194 } 4195 4196 void 4197 i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj, 4198 const struct i915_ggtt_view *view) 4199 { 4200 struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view); 4201 4202 WARN_ON(vma->pin_count == 0); 4203 WARN_ON(!i915_gem_obj_ggtt_bound_view(obj, view)); 4204 4205 --vma->pin_count; 4206 } 4207 4208 int 4209 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 4210 struct drm_file *file) 4211 { 4212 struct drm_i915_gem_busy *args = data; 4213 struct drm_i915_gem_object *obj; 4214 int ret; 4215 4216 ret = i915_mutex_lock_interruptible(dev); 4217 if (ret) 4218 return ret; 4219 4220 obj = i915_gem_object_lookup(file, args->handle); 4221 if (!obj) { 4222 ret = -ENOENT; 4223 goto unlock; 4224 } 4225 4226 /* Count all active objects as busy, even if they are currently not used 4227 * by the gpu. Users of this interface expect objects to eventually 4228 * become non-busy without any further actions, therefore emit any 4229 * necessary flushes here. 4230 */ 4231 ret = i915_gem_object_flush_active(obj); 4232 if (ret) 4233 goto unref; 4234 4235 args->busy = 0; 4236 if (obj->active) { 4237 int i; 4238 4239 for (i = 0; i < I915_NUM_ENGINES; i++) { 4240 struct drm_i915_gem_request *req; 4241 4242 req = obj->last_read_req[i]; 4243 if (req) 4244 args->busy |= 1 << (16 + req->engine->exec_id); 4245 } 4246 if (obj->last_write_req) 4247 args->busy |= obj->last_write_req->engine->exec_id; 4248 } 4249 4250 unref: 4251 i915_gem_object_put(obj); 4252 unlock: 4253 mutex_unlock(&dev->struct_mutex); 4254 return ret; 4255 } 4256 4257 int 4258 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 4259 struct drm_file *file_priv) 4260 { 4261 return i915_gem_ring_throttle(dev, file_priv); 4262 } 4263 4264 int 4265 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 4266 struct drm_file *file_priv) 4267 { 4268 struct drm_i915_private *dev_priv = to_i915(dev); 4269 struct drm_i915_gem_madvise *args = data; 4270 struct drm_i915_gem_object *obj; 4271 int ret; 4272 4273 switch (args->madv) { 4274 case I915_MADV_DONTNEED: 4275 case I915_MADV_WILLNEED: 4276 break; 4277 default: 4278 return -EINVAL; 4279 } 4280 4281 ret = i915_mutex_lock_interruptible(dev); 4282 if (ret) 4283 return ret; 4284 4285 obj = i915_gem_object_lookup(file_priv, args->handle); 4286 if (!obj) { 4287 ret = -ENOENT; 4288 goto unlock; 4289 } 4290 4291 if (i915_gem_obj_is_pinned(obj)) { 4292 ret = -EINVAL; 4293 goto out; 4294 } 4295 4296 if (obj->pages && 4297 obj->tiling_mode != I915_TILING_NONE && 4298 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 4299 if (obj->madv == I915_MADV_WILLNEED) 4300 i915_gem_object_unpin_pages(obj); 4301 if (args->madv == I915_MADV_WILLNEED) 4302 i915_gem_object_pin_pages(obj); 4303 } 4304 4305 if (obj->madv != __I915_MADV_PURGED) 4306 obj->madv = args->madv; 4307 4308 /* if the object is no longer attached, discard its backing storage */ 4309 if (obj->madv == I915_MADV_DONTNEED && obj->pages == NULL) 4310 i915_gem_object_truncate(obj); 4311 4312 args->retained = obj->madv != __I915_MADV_PURGED; 4313 4314 out: 4315 i915_gem_object_put(obj); 4316 unlock: 4317 mutex_unlock(&dev->struct_mutex); 4318 return ret; 4319 } 4320 4321 void i915_gem_object_init(struct drm_i915_gem_object *obj, 4322 const struct drm_i915_gem_object_ops *ops) 4323 { 4324 int i; 4325 4326 INIT_LIST_HEAD(&obj->global_list); 4327 for (i = 0; i < I915_NUM_ENGINES; i++) 4328 INIT_LIST_HEAD(&obj->engine_list[i]); 4329 INIT_LIST_HEAD(&obj->obj_exec_link); 4330 INIT_LIST_HEAD(&obj->vma_list); 4331 INIT_LIST_HEAD(&obj->batch_pool_link); 4332 4333 obj->ops = ops; 4334 4335 obj->fence_reg = I915_FENCE_REG_NONE; 4336 obj->madv = I915_MADV_WILLNEED; 4337 4338 i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size); 4339 } 4340 4341 static const struct drm_i915_gem_object_ops i915_gem_object_ops = { 4342 .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE, 4343 .get_pages = i915_gem_object_get_pages_gtt, 4344 .put_pages = i915_gem_object_put_pages_gtt, 4345 }; 4346 4347 struct drm_i915_gem_object *i915_gem_object_create(struct drm_device *dev, 4348 size_t size) 4349 { 4350 struct drm_i915_gem_object *obj; 4351 #if 0 4352 struct address_space *mapping; 4353 gfp_t mask; 4354 #endif 4355 int ret; 4356 4357 obj = i915_gem_object_alloc(dev); 4358 if (obj == NULL) 4359 return ERR_PTR(-ENOMEM); 4360 4361 ret = drm_gem_object_init(dev, &obj->base, size); 4362 if (ret) 4363 goto fail; 4364 4365 #if 0 4366 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 4367 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) { 4368 /* 965gm cannot relocate objects above 4GiB. */ 4369 mask &= ~__GFP_HIGHMEM; 4370 mask |= __GFP_DMA32; 4371 } 4372 4373 mapping = file_inode(obj->base.filp)->i_mapping; 4374 mapping_set_gfp_mask(mapping, mask); 4375 #endif 4376 4377 i915_gem_object_init(obj, &i915_gem_object_ops); 4378 4379 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4380 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4381 4382 if (HAS_LLC(dev)) { 4383 /* On some devices, we can have the GPU use the LLC (the CPU 4384 * cache) for about a 10% performance improvement 4385 * compared to uncached. Graphics requests other than 4386 * display scanout are coherent with the CPU in 4387 * accessing this cache. This means in this mode we 4388 * don't need to clflush on the CPU side, and on the 4389 * GPU side we only need to flush internal caches to 4390 * get data visible to the CPU. 4391 * 4392 * However, we maintain the display planes as UC, and so 4393 * need to rebind when first used as such. 4394 */ 4395 obj->cache_level = I915_CACHE_LLC; 4396 } else 4397 obj->cache_level = I915_CACHE_NONE; 4398 4399 trace_i915_gem_object_create(obj); 4400 4401 return obj; 4402 4403 fail: 4404 i915_gem_object_free(obj); 4405 4406 return ERR_PTR(ret); 4407 } 4408 4409 static bool discard_backing_storage(struct drm_i915_gem_object *obj) 4410 { 4411 /* If we are the last user of the backing storage (be it shmemfs 4412 * pages or stolen etc), we know that the pages are going to be 4413 * immediately released. In this case, we can then skip copying 4414 * back the contents from the GPU. 4415 */ 4416 4417 if (obj->madv != I915_MADV_WILLNEED) 4418 return false; 4419 4420 if (obj->base.filp == NULL) 4421 return true; 4422 4423 /* At first glance, this looks racy, but then again so would be 4424 * userspace racing mmap against close. However, the first external 4425 * reference to the filp can only be obtained through the 4426 * i915_gem_mmap_ioctl() which safeguards us against the user 4427 * acquiring such a reference whilst we are in the middle of 4428 * freeing the object. 4429 */ 4430 #if 0 4431 return atomic_long_read(&obj->base.filp->f_count) == 1; 4432 #else 4433 return false; 4434 #endif 4435 } 4436 4437 void i915_gem_free_object(struct drm_gem_object *gem_obj) 4438 { 4439 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 4440 struct drm_device *dev = obj->base.dev; 4441 struct drm_i915_private *dev_priv = to_i915(dev); 4442 struct i915_vma *vma, *next; 4443 4444 intel_runtime_pm_get(dev_priv); 4445 4446 trace_i915_gem_object_destroy(obj); 4447 4448 list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) { 4449 int ret; 4450 4451 vma->pin_count = 0; 4452 ret = __i915_vma_unbind_no_wait(vma); 4453 if (WARN_ON(ret == -ERESTARTSYS)) { 4454 bool was_interruptible; 4455 4456 was_interruptible = dev_priv->mm.interruptible; 4457 dev_priv->mm.interruptible = false; 4458 4459 WARN_ON(i915_vma_unbind(vma)); 4460 4461 dev_priv->mm.interruptible = was_interruptible; 4462 } 4463 } 4464 4465 /* Stolen objects don't hold a ref, but do hold pin count. Fix that up 4466 * before progressing. */ 4467 if (obj->stolen) 4468 i915_gem_object_unpin_pages(obj); 4469 4470 WARN_ON(obj->frontbuffer_bits); 4471 4472 if (obj->pages && obj->madv == I915_MADV_WILLNEED && 4473 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES && 4474 obj->tiling_mode != I915_TILING_NONE) 4475 i915_gem_object_unpin_pages(obj); 4476 4477 if (WARN_ON(obj->pages_pin_count)) 4478 obj->pages_pin_count = 0; 4479 if (discard_backing_storage(obj)) 4480 obj->madv = I915_MADV_DONTNEED; 4481 i915_gem_object_put_pages(obj); 4482 4483 BUG_ON(obj->pages); 4484 4485 #if 0 4486 if (obj->base.import_attach) 4487 drm_prime_gem_destroy(&obj->base, NULL); 4488 #endif 4489 4490 if (obj->ops->release) 4491 obj->ops->release(obj); 4492 4493 drm_gem_object_release(&obj->base); 4494 i915_gem_info_remove_obj(dev_priv, obj->base.size); 4495 4496 kfree(obj->bit_17); 4497 i915_gem_object_free(obj); 4498 4499 intel_runtime_pm_put(dev_priv); 4500 } 4501 4502 struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, 4503 struct i915_address_space *vm) 4504 { 4505 struct i915_vma *vma; 4506 list_for_each_entry(vma, &obj->vma_list, obj_link) { 4507 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL && 4508 vma->vm == vm) 4509 return vma; 4510 } 4511 return NULL; 4512 } 4513 4514 struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj, 4515 const struct i915_ggtt_view *view) 4516 { 4517 struct i915_vma *vma; 4518 4519 GEM_BUG_ON(!view); 4520 4521 list_for_each_entry(vma, &obj->vma_list, obj_link) 4522 if (vma->is_ggtt && i915_ggtt_view_equal(&vma->ggtt_view, view)) 4523 return vma; 4524 return NULL; 4525 } 4526 4527 void i915_gem_vma_destroy(struct i915_vma *vma) 4528 { 4529 WARN_ON(vma->node.allocated); 4530 4531 /* Keep the vma as a placeholder in the execbuffer reservation lists */ 4532 if (!list_empty(&vma->exec_list)) 4533 return; 4534 4535 if (!vma->is_ggtt) 4536 i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm)); 4537 4538 list_del(&vma->obj_link); 4539 4540 kfree(vma); 4541 } 4542 4543 static void 4544 i915_gem_stop_engines(struct drm_device *dev) 4545 { 4546 struct drm_i915_private *dev_priv = to_i915(dev); 4547 struct intel_engine_cs *engine; 4548 4549 for_each_engine(engine, dev_priv) 4550 dev_priv->gt.stop_engine(engine); 4551 } 4552 4553 int 4554 i915_gem_suspend(struct drm_device *dev) 4555 { 4556 struct drm_i915_private *dev_priv = to_i915(dev); 4557 int ret = 0; 4558 4559 intel_suspend_gt_powersave(dev_priv); 4560 4561 mutex_lock(&dev->struct_mutex); 4562 4563 /* We have to flush all the executing contexts to main memory so 4564 * that they can saved in the hibernation image. To ensure the last 4565 * context image is coherent, we have to switch away from it. That 4566 * leaves the dev_priv->kernel_context still active when 4567 * we actually suspend, and its image in memory may not match the GPU 4568 * state. Fortunately, the kernel_context is disposable and we do 4569 * not rely on its state. 4570 */ 4571 ret = i915_gem_switch_to_kernel_context(dev_priv); 4572 if (ret) 4573 goto err; 4574 4575 ret = i915_gem_wait_for_idle(dev_priv); 4576 if (ret) 4577 goto err; 4578 4579 i915_gem_retire_requests(dev_priv); 4580 4581 /* Note that rather than stopping the engines, all we have to do 4582 * is assert that every RING_HEAD == RING_TAIL (all execution complete) 4583 * and similar for all logical context images (to ensure they are 4584 * all ready for hibernation). 4585 */ 4586 i915_gem_stop_engines(dev); 4587 i915_gem_context_lost(dev_priv); 4588 mutex_unlock(&dev->struct_mutex); 4589 4590 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); 4591 cancel_delayed_work_sync(&dev_priv->gt.retire_work); 4592 flush_delayed_work(&dev_priv->gt.idle_work); 4593 4594 /* Assert that we sucessfully flushed all the work and 4595 * reset the GPU back to its idle, low power state. 4596 */ 4597 WARN_ON(dev_priv->gt.awake); 4598 4599 return 0; 4600 4601 err: 4602 mutex_unlock(&dev->struct_mutex); 4603 return ret; 4604 } 4605 4606 void i915_gem_resume(struct drm_device *dev) 4607 { 4608 struct drm_i915_private *dev_priv = to_i915(dev); 4609 4610 mutex_lock(&dev->struct_mutex); 4611 i915_gem_restore_gtt_mappings(dev); 4612 4613 /* As we didn't flush the kernel context before suspend, we cannot 4614 * guarantee that the context image is complete. So let's just reset 4615 * it and start again. 4616 */ 4617 if (i915.enable_execlists) 4618 intel_lr_context_reset(dev_priv, dev_priv->kernel_context); 4619 4620 mutex_unlock(&dev->struct_mutex); 4621 } 4622 4623 void i915_gem_init_swizzling(struct drm_device *dev) 4624 { 4625 struct drm_i915_private *dev_priv = to_i915(dev); 4626 4627 if (INTEL_INFO(dev)->gen < 5 || 4628 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 4629 return; 4630 4631 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 4632 DISP_TILE_SURFACE_SWIZZLING); 4633 4634 if (IS_GEN5(dev)) 4635 return; 4636 4637 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 4638 if (IS_GEN6(dev)) 4639 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 4640 else if (IS_GEN7(dev)) 4641 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 4642 else if (IS_GEN8(dev)) 4643 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW)); 4644 else 4645 BUG(); 4646 } 4647 4648 static void init_unused_ring(struct drm_device *dev, u32 base) 4649 { 4650 struct drm_i915_private *dev_priv = to_i915(dev); 4651 4652 I915_WRITE(RING_CTL(base), 0); 4653 I915_WRITE(RING_HEAD(base), 0); 4654 I915_WRITE(RING_TAIL(base), 0); 4655 I915_WRITE(RING_START(base), 0); 4656 } 4657 4658 static void init_unused_rings(struct drm_device *dev) 4659 { 4660 if (IS_I830(dev)) { 4661 init_unused_ring(dev, PRB1_BASE); 4662 init_unused_ring(dev, SRB0_BASE); 4663 init_unused_ring(dev, SRB1_BASE); 4664 init_unused_ring(dev, SRB2_BASE); 4665 init_unused_ring(dev, SRB3_BASE); 4666 } else if (IS_GEN2(dev)) { 4667 init_unused_ring(dev, SRB0_BASE); 4668 init_unused_ring(dev, SRB1_BASE); 4669 } else if (IS_GEN3(dev)) { 4670 init_unused_ring(dev, PRB1_BASE); 4671 init_unused_ring(dev, PRB2_BASE); 4672 } 4673 } 4674 4675 int 4676 i915_gem_init_hw(struct drm_device *dev) 4677 { 4678 struct drm_i915_private *dev_priv = to_i915(dev); 4679 struct intel_engine_cs *engine; 4680 int ret; 4681 4682 /* Double layer security blanket, see i915_gem_init() */ 4683 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 4684 4685 if (HAS_EDRAM(dev) && INTEL_GEN(dev_priv) < 9) 4686 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf)); 4687 4688 if (IS_HASWELL(dev)) 4689 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev) ? 4690 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); 4691 4692 if (HAS_PCH_NOP(dev)) { 4693 if (IS_IVYBRIDGE(dev)) { 4694 u32 temp = I915_READ(GEN7_MSG_CTL); 4695 temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK); 4696 I915_WRITE(GEN7_MSG_CTL, temp); 4697 } else if (INTEL_INFO(dev)->gen >= 7) { 4698 u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT); 4699 temp &= ~RESET_PCH_HANDSHAKE_ENABLE; 4700 I915_WRITE(HSW_NDE_RSTWRN_OPT, temp); 4701 } 4702 } 4703 4704 i915_gem_init_swizzling(dev); 4705 4706 /* 4707 * At least 830 can leave some of the unused rings 4708 * "active" (ie. head != tail) after resume which 4709 * will prevent c3 entry. Makes sure all unused rings 4710 * are totally idle. 4711 */ 4712 init_unused_rings(dev); 4713 4714 BUG_ON(!dev_priv->kernel_context); 4715 4716 ret = i915_ppgtt_init_hw(dev); 4717 if (ret) { 4718 DRM_ERROR("PPGTT enable HW failed %d\n", ret); 4719 goto out; 4720 } 4721 4722 /* Need to do basic initialisation of all rings first: */ 4723 for_each_engine(engine, dev_priv) { 4724 ret = engine->init_hw(engine); 4725 if (ret) 4726 goto out; 4727 } 4728 4729 intel_mocs_init_l3cc_table(dev); 4730 4731 /* We can't enable contexts until all firmware is loaded */ 4732 ret = intel_guc_setup(dev); 4733 if (ret) 4734 goto out; 4735 4736 out: 4737 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 4738 return ret; 4739 } 4740 4741 bool intel_sanitize_semaphores(struct drm_i915_private *dev_priv, int value) 4742 { 4743 if (INTEL_INFO(dev_priv)->gen < 6) 4744 return false; 4745 4746 /* TODO: make semaphores and Execlists play nicely together */ 4747 if (i915.enable_execlists) 4748 return false; 4749 4750 if (value >= 0) 4751 return value; 4752 4753 #ifdef CONFIG_INTEL_IOMMU 4754 /* Enable semaphores on SNB when IO remapping is off */ 4755 if (INTEL_INFO(dev_priv)->gen == 6 && intel_iommu_gfx_mapped) 4756 return false; 4757 #endif 4758 4759 return true; 4760 } 4761 4762 int i915_gem_init(struct drm_device *dev) 4763 { 4764 struct drm_i915_private *dev_priv = to_i915(dev); 4765 int ret; 4766 4767 mutex_lock(&dev->struct_mutex); 4768 4769 if (!i915.enable_execlists) { 4770 dev_priv->gt.execbuf_submit = i915_gem_ringbuffer_submission; 4771 dev_priv->gt.cleanup_engine = intel_cleanup_engine; 4772 dev_priv->gt.stop_engine = intel_stop_engine; 4773 } else { 4774 dev_priv->gt.execbuf_submit = intel_execlists_submission; 4775 dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup; 4776 dev_priv->gt.stop_engine = intel_logical_ring_stop; 4777 } 4778 4779 /* This is just a security blanket to placate dragons. 4780 * On some systems, we very sporadically observe that the first TLBs 4781 * used by the CS may be stale, despite us poking the TLB reset. If 4782 * we hold the forcewake during initialisation these problems 4783 * just magically go away. 4784 */ 4785 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 4786 4787 i915_gem_init_userptr(dev_priv); 4788 i915_gem_init_ggtt(dev); 4789 4790 ret = i915_gem_context_init(dev); 4791 if (ret) 4792 goto out_unlock; 4793 4794 ret = intel_engines_init(dev); 4795 if (ret) 4796 goto out_unlock; 4797 4798 ret = i915_gem_init_hw(dev); 4799 if (ret == -EIO) { 4800 /* Allow ring initialisation to fail by marking the GPU as 4801 * wedged. But we only want to do this where the GPU is angry, 4802 * for all other failure, such as an allocation failure, bail. 4803 */ 4804 DRM_ERROR("Failed to initialize GPU, declaring it wedged\n"); 4805 atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter); 4806 ret = 0; 4807 } 4808 4809 out_unlock: 4810 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 4811 mutex_unlock(&dev->struct_mutex); 4812 4813 return ret; 4814 } 4815 4816 void 4817 i915_gem_cleanup_engines(struct drm_device *dev) 4818 { 4819 struct drm_i915_private *dev_priv = to_i915(dev); 4820 struct intel_engine_cs *engine; 4821 4822 for_each_engine(engine, dev_priv) 4823 dev_priv->gt.cleanup_engine(engine); 4824 } 4825 4826 static void 4827 init_engine_lists(struct intel_engine_cs *engine) 4828 { 4829 INIT_LIST_HEAD(&engine->active_list); 4830 INIT_LIST_HEAD(&engine->request_list); 4831 } 4832 4833 void 4834 i915_gem_load_init_fences(struct drm_i915_private *dev_priv) 4835 { 4836 struct drm_device *dev = &dev_priv->drm; 4837 4838 if (INTEL_INFO(dev_priv)->gen >= 7 && !IS_VALLEYVIEW(dev_priv) && 4839 !IS_CHERRYVIEW(dev_priv)) 4840 dev_priv->num_fence_regs = 32; 4841 else if (INTEL_INFO(dev_priv)->gen >= 4 || IS_I945G(dev_priv) || 4842 IS_I945GM(dev_priv) || IS_G33(dev_priv)) 4843 dev_priv->num_fence_regs = 16; 4844 else 4845 dev_priv->num_fence_regs = 8; 4846 4847 if (intel_vgpu_active(dev_priv)) 4848 dev_priv->num_fence_regs = 4849 I915_READ(vgtif_reg(avail_rs.fence_num)); 4850 4851 /* Initialize fence registers to zero */ 4852 i915_gem_restore_fences(dev); 4853 4854 i915_gem_detect_bit_6_swizzle(dev); 4855 } 4856 4857 void 4858 i915_gem_load_init(struct drm_device *dev) 4859 { 4860 struct drm_i915_private *dev_priv = to_i915(dev); 4861 int i; 4862 4863 #if 0 4864 dev_priv->objects = 4865 kmem_cache_create("i915_gem_object", 4866 sizeof(struct drm_i915_gem_object), 0, 4867 SLAB_HWCACHE_ALIGN, 4868 NULL); 4869 dev_priv->vmas = 4870 kmem_cache_create("i915_gem_vma", 4871 sizeof(struct i915_vma), 0, 4872 SLAB_HWCACHE_ALIGN, 4873 NULL); 4874 dev_priv->requests = 4875 kmem_cache_create("i915_gem_request", 4876 sizeof(struct drm_i915_gem_request), 0, 4877 SLAB_HWCACHE_ALIGN, 4878 NULL); 4879 #endif 4880 4881 INIT_LIST_HEAD(&dev_priv->vm_list); 4882 INIT_LIST_HEAD(&dev_priv->context_list); 4883 INIT_LIST_HEAD(&dev_priv->mm.unbound_list); 4884 INIT_LIST_HEAD(&dev_priv->mm.bound_list); 4885 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 4886 for (i = 0; i < I915_NUM_ENGINES; i++) 4887 init_engine_lists(&dev_priv->engine[i]); 4888 for (i = 0; i < I915_MAX_NUM_FENCES; i++) 4889 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); 4890 INIT_DELAYED_WORK(&dev_priv->gt.retire_work, 4891 i915_gem_retire_work_handler); 4892 INIT_DELAYED_WORK(&dev_priv->gt.idle_work, 4893 i915_gem_idle_work_handler); 4894 init_waitqueue_head(&dev_priv->gpu_error.wait_queue); 4895 init_waitqueue_head(&dev_priv->gpu_error.reset_queue); 4896 4897 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; 4898 4899 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 4900 4901 init_waitqueue_head(&dev_priv->pending_flip_queue); 4902 4903 dev_priv->mm.interruptible = true; 4904 4905 lockinit(&dev_priv->fb_tracking.lock, "drmftl", 0, LK_CANRECURSE); 4906 } 4907 4908 void i915_gem_load_cleanup(struct drm_device *dev) 4909 { 4910 #if 0 4911 struct drm_i915_private *dev_priv = to_i915(dev); 4912 4913 kmem_cache_destroy(dev_priv->requests); 4914 kmem_cache_destroy(dev_priv->vmas); 4915 kmem_cache_destroy(dev_priv->objects); 4916 #endif 4917 } 4918 4919 int i915_gem_freeze_late(struct drm_i915_private *dev_priv) 4920 { 4921 struct drm_i915_gem_object *obj; 4922 4923 /* Called just before we write the hibernation image. 4924 * 4925 * We need to update the domain tracking to reflect that the CPU 4926 * will be accessing all the pages to create and restore from the 4927 * hibernation, and so upon restoration those pages will be in the 4928 * CPU domain. 4929 * 4930 * To make sure the hibernation image contains the latest state, 4931 * we update that state just before writing out the image. 4932 */ 4933 4934 list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) { 4935 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4936 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4937 } 4938 4939 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 4940 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4941 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4942 } 4943 4944 return 0; 4945 } 4946 4947 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 4948 { 4949 struct drm_i915_file_private *file_priv = file->driver_priv; 4950 4951 /* Clean up our request list when the client is going away, so that 4952 * later retire_requests won't dereference our soon-to-be-gone 4953 * file_priv. 4954 */ 4955 lockmgr(&file_priv->mm.lock, LK_EXCLUSIVE); 4956 while (!list_empty(&file_priv->mm.request_list)) { 4957 struct drm_i915_gem_request *request; 4958 4959 request = list_first_entry(&file_priv->mm.request_list, 4960 struct drm_i915_gem_request, 4961 client_list); 4962 list_del(&request->client_list); 4963 request->file_priv = NULL; 4964 } 4965 lockmgr(&file_priv->mm.lock, LK_RELEASE); 4966 4967 if (!list_empty(&file_priv->rps.link)) { 4968 lockmgr(&to_i915(dev)->rps.client_lock, LK_EXCLUSIVE); 4969 list_del(&file_priv->rps.link); 4970 lockmgr(&to_i915(dev)->rps.client_lock, LK_RELEASE); 4971 } 4972 } 4973 4974 int 4975 i915_gem_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, 4976 vm_ooffset_t foff, struct ucred *cred, u_short *color) 4977 { 4978 *color = 0; /* XXXKIB */ 4979 return (0); 4980 } 4981 4982 void 4983 i915_gem_pager_dtor(void *handle) 4984 { 4985 struct drm_gem_object *obj; 4986 struct drm_device *dev; 4987 4988 obj = handle; 4989 dev = obj->dev; 4990 4991 mutex_lock(&dev->struct_mutex); 4992 drm_gem_free_mmap_offset(obj); 4993 i915_gem_release_mmap(to_intel_bo(obj)); 4994 drm_gem_object_unreference(obj); 4995 mutex_unlock(&dev->struct_mutex); 4996 } 4997 4998 int i915_gem_open(struct drm_device *dev, struct drm_file *file) 4999 { 5000 struct drm_i915_file_private *file_priv; 5001 int ret; 5002 5003 DRM_DEBUG_DRIVER("\n"); 5004 5005 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 5006 if (!file_priv) 5007 return -ENOMEM; 5008 5009 file->driver_priv = file_priv; 5010 file_priv->dev_priv = to_i915(dev); 5011 file_priv->file = file; 5012 INIT_LIST_HEAD(&file_priv->rps.link); 5013 5014 lockinit(&file_priv->mm.lock, "i915_priv", 0, 0); 5015 INIT_LIST_HEAD(&file_priv->mm.request_list); 5016 5017 file_priv->bsd_ring = -1; 5018 5019 ret = i915_gem_context_open(dev, file); 5020 if (ret) 5021 kfree(file_priv); 5022 5023 return ret; 5024 } 5025 5026 /** 5027 * i915_gem_track_fb - update frontbuffer tracking 5028 * @old: current GEM buffer for the frontbuffer slots 5029 * @new: new GEM buffer for the frontbuffer slots 5030 * @frontbuffer_bits: bitmask of frontbuffer slots 5031 * 5032 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them 5033 * from @old and setting them in @new. Both @old and @new can be NULL. 5034 */ 5035 void i915_gem_track_fb(struct drm_i915_gem_object *old, 5036 struct drm_i915_gem_object *new, 5037 unsigned frontbuffer_bits) 5038 { 5039 if (old) { 5040 WARN_ON(!mutex_is_locked(&old->base.dev->struct_mutex)); 5041 WARN_ON(!(old->frontbuffer_bits & frontbuffer_bits)); 5042 old->frontbuffer_bits &= ~frontbuffer_bits; 5043 } 5044 5045 if (new) { 5046 WARN_ON(!mutex_is_locked(&new->base.dev->struct_mutex)); 5047 WARN_ON(new->frontbuffer_bits & frontbuffer_bits); 5048 new->frontbuffer_bits |= frontbuffer_bits; 5049 } 5050 } 5051 5052 /* All the new VM stuff */ 5053 u64 i915_gem_obj_offset(struct drm_i915_gem_object *o, 5054 struct i915_address_space *vm) 5055 { 5056 struct drm_i915_private *dev_priv = to_i915(o->base.dev); 5057 struct i915_vma *vma; 5058 5059 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); 5060 5061 list_for_each_entry(vma, &o->vma_list, obj_link) { 5062 if (vma->is_ggtt && 5063 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5064 continue; 5065 if (vma->vm == vm) 5066 return vma->node.start; 5067 } 5068 5069 WARN(1, "%s vma for this object not found.\n", 5070 i915_is_ggtt(vm) ? "global" : "ppgtt"); 5071 return -1; 5072 } 5073 5074 u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o, 5075 const struct i915_ggtt_view *view) 5076 { 5077 struct i915_vma *vma; 5078 5079 list_for_each_entry(vma, &o->vma_list, obj_link) 5080 if (vma->is_ggtt && i915_ggtt_view_equal(&vma->ggtt_view, view)) 5081 return vma->node.start; 5082 5083 WARN(1, "global vma for this object not found. (view=%u)\n", view->type); 5084 return -1; 5085 } 5086 5087 bool i915_gem_obj_bound(struct drm_i915_gem_object *o, 5088 struct i915_address_space *vm) 5089 { 5090 struct i915_vma *vma; 5091 5092 list_for_each_entry(vma, &o->vma_list, obj_link) { 5093 if (vma->is_ggtt && 5094 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5095 continue; 5096 if (vma->vm == vm && drm_mm_node_allocated(&vma->node)) 5097 return true; 5098 } 5099 5100 return false; 5101 } 5102 5103 bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o, 5104 const struct i915_ggtt_view *view) 5105 { 5106 struct i915_vma *vma; 5107 5108 list_for_each_entry(vma, &o->vma_list, obj_link) 5109 if (vma->is_ggtt && 5110 i915_ggtt_view_equal(&vma->ggtt_view, view) && 5111 drm_mm_node_allocated(&vma->node)) 5112 return true; 5113 5114 return false; 5115 } 5116 5117 bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o) 5118 { 5119 struct i915_vma *vma; 5120 5121 list_for_each_entry(vma, &o->vma_list, obj_link) 5122 if (drm_mm_node_allocated(&vma->node)) 5123 return true; 5124 5125 return false; 5126 } 5127 5128 unsigned long i915_gem_obj_ggtt_size(struct drm_i915_gem_object *o) 5129 { 5130 struct i915_vma *vma; 5131 5132 GEM_BUG_ON(list_empty(&o->vma_list)); 5133 5134 list_for_each_entry(vma, &o->vma_list, obj_link) { 5135 if (vma->is_ggtt && 5136 vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) 5137 return vma->node.size; 5138 } 5139 5140 return 0; 5141 } 5142 5143 bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj) 5144 { 5145 struct i915_vma *vma; 5146 list_for_each_entry(vma, &obj->vma_list, obj_link) 5147 if (vma->pin_count > 0) 5148 return true; 5149 5150 return false; 5151 } 5152 5153 /* Like i915_gem_object_get_page(), but mark the returned page dirty */ 5154 struct page * 5155 i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n) 5156 { 5157 struct page *page; 5158 5159 /* Only default objects have per-page dirty tracking */ 5160 if (WARN_ON(!i915_gem_object_has_struct_page(obj))) 5161 return NULL; 5162 5163 page = i915_gem_object_get_page(obj, n); 5164 set_page_dirty(page); 5165 return page; 5166 } 5167 5168 /* Allocate a new GEM object and fill it with the supplied data */ 5169 struct drm_i915_gem_object * 5170 i915_gem_object_create_from_data(struct drm_device *dev, 5171 const void *data, size_t size) 5172 { 5173 struct drm_i915_gem_object *obj; 5174 struct sg_table *sg; 5175 size_t bytes; 5176 int ret; 5177 5178 obj = i915_gem_object_create(dev, round_up(size, PAGE_SIZE)); 5179 if (IS_ERR(obj)) 5180 return obj; 5181 5182 ret = i915_gem_object_set_to_cpu_domain(obj, true); 5183 if (ret) 5184 goto fail; 5185 5186 ret = i915_gem_object_get_pages(obj); 5187 if (ret) 5188 goto fail; 5189 5190 i915_gem_object_pin_pages(obj); 5191 sg = obj->pages; 5192 bytes = sg_copy_from_buffer(sg->sgl, sg->nents, (void *)data, size); 5193 obj->dirty = 1; /* Backing store is now out of date */ 5194 i915_gem_object_unpin_pages(obj); 5195 5196 if (WARN_ON(bytes != size)) { 5197 DRM_ERROR("Incomplete copy, wrote %zu of %zu", bytes, size); 5198 ret = -EFAULT; 5199 goto fail; 5200 } 5201 5202 return obj; 5203 5204 fail: 5205 i915_gem_object_put(obj); 5206 return ERR_PTR(ret); 5207 } 5208