1 /* 2 * Copyright © 2008 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 * Copyright (c) 2011 The FreeBSD Foundation 27 * All rights reserved. 28 * 29 * This software was developed by Konstantin Belousov under sponsorship from 30 * the FreeBSD Foundation. 31 * 32 * Redistribution and use in source and binary forms, with or without 33 * modification, are permitted provided that the following conditions 34 * are met: 35 * 1. Redistributions of source code must retain the above copyright 36 * notice, this list of conditions and the following disclaimer. 37 * 2. Redistributions in binary form must reproduce the above copyright 38 * notice, this list of conditions and the following disclaimer in the 39 * documentation and/or other materials provided with the distribution. 40 * 41 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 51 * SUCH DAMAGE. 52 * 53 */ 54 55 #include <machine/md_var.h> 56 57 #include <drm/drmP.h> 58 #include <drm/drm_vma_manager.h> 59 #include <drm/i915_drm.h> 60 #include "i915_drv.h" 61 #include "i915_trace.h" 62 #include "intel_drv.h" 63 #include <linux/shmem_fs.h> 64 #include <linux/slab.h> 65 #include <linux/swap.h> 66 #include <linux/pci.h> 67 68 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); 69 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); 70 static __must_check int 71 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, 72 bool readonly); 73 static void 74 i915_gem_object_retire(struct drm_i915_gem_object *obj); 75 76 static void i915_gem_write_fence(struct drm_device *dev, int reg, 77 struct drm_i915_gem_object *obj); 78 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, 79 struct drm_i915_fence_reg *fence, 80 bool enable); 81 82 static unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv); 83 84 static bool cpu_cache_is_coherent(struct drm_device *dev, 85 enum i915_cache_level level) 86 { 87 return HAS_LLC(dev) || level != I915_CACHE_NONE; 88 } 89 90 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 91 { 92 if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) 93 return true; 94 95 return obj->pin_display; 96 } 97 98 static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) 99 { 100 if (obj->tiling_mode) 101 i915_gem_release_mmap(obj); 102 103 /* As we do not have an associated fence register, we will force 104 * a tiling change if we ever need to acquire one. 105 */ 106 obj->fence_dirty = false; 107 obj->fence_reg = I915_FENCE_REG_NONE; 108 } 109 110 /* some bookkeeping */ 111 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 112 size_t size) 113 { 114 spin_lock(&dev_priv->mm.object_stat_lock); 115 dev_priv->mm.object_count++; 116 dev_priv->mm.object_memory += size; 117 spin_unlock(&dev_priv->mm.object_stat_lock); 118 } 119 120 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 121 size_t size) 122 { 123 spin_lock(&dev_priv->mm.object_stat_lock); 124 dev_priv->mm.object_count--; 125 dev_priv->mm.object_memory -= size; 126 spin_unlock(&dev_priv->mm.object_stat_lock); 127 } 128 129 static int 130 i915_gem_wait_for_error(struct i915_gpu_error *error) 131 { 132 int ret; 133 134 #define EXIT_COND (!i915_reset_in_progress(error) || \ 135 i915_terminally_wedged(error)) 136 if (EXIT_COND) 137 return 0; 138 139 /* 140 * Only wait 10 seconds for the gpu reset to complete to avoid hanging 141 * userspace. If it takes that long something really bad is going on and 142 * we should simply try to bail out and fail as gracefully as possible. 143 */ 144 ret = wait_event_interruptible_timeout(error->reset_queue, 145 EXIT_COND, 146 10*HZ); 147 if (ret == 0) { 148 DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); 149 return -EIO; 150 } else if (ret < 0) { 151 return ret; 152 } 153 #undef EXIT_COND 154 155 return 0; 156 } 157 158 int i915_mutex_lock_interruptible(struct drm_device *dev) 159 { 160 struct drm_i915_private *dev_priv = dev->dev_private; 161 int ret; 162 163 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 164 if (ret) 165 return ret; 166 167 ret = mutex_lock_interruptible(&dev->struct_mutex); 168 if (ret) 169 return ret; 170 171 WARN_ON(i915_verify_lists(dev)); 172 return 0; 173 } 174 175 int 176 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 177 struct drm_file *file) 178 { 179 struct drm_i915_private *dev_priv = dev->dev_private; 180 struct drm_i915_gem_get_aperture *args = data; 181 struct drm_i915_gem_object *obj; 182 size_t pinned; 183 184 pinned = 0; 185 mutex_lock(&dev->struct_mutex); 186 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) 187 if (i915_gem_obj_is_pinned(obj)) 188 pinned += i915_gem_obj_ggtt_size(obj); 189 mutex_unlock(&dev->struct_mutex); 190 191 args->aper_size = dev_priv->gtt.base.total; 192 args->aper_available_size = args->aper_size - pinned; 193 194 return 0; 195 } 196 197 #if 0 198 static int 199 i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) 200 { 201 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; 202 char *vaddr = obj->phys_handle->vaddr; 203 struct sg_table *st; 204 struct scatterlist *sg; 205 int i; 206 207 if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) 208 return -EINVAL; 209 210 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 211 struct vm_page *page; 212 char *src; 213 214 page = shmem_read_mapping_page(mapping, i); 215 if (IS_ERR(page)) 216 return PTR_ERR(page); 217 218 src = kmap_atomic(page); 219 memcpy(vaddr, src, PAGE_SIZE); 220 drm_clflush_virt_range(vaddr, PAGE_SIZE); 221 kunmap_atomic(src); 222 223 page_cache_release(page); 224 vaddr += PAGE_SIZE; 225 } 226 227 i915_gem_chipset_flush(obj->base.dev); 228 229 st = kmalloc(sizeof(*st), GFP_KERNEL); 230 if (st == NULL) 231 return -ENOMEM; 232 233 if (sg_alloc_table(st, 1, GFP_KERNEL)) { 234 kfree(st); 235 return -ENOMEM; 236 } 237 238 sg = st->sgl; 239 sg->offset = 0; 240 sg->length = obj->base.size; 241 242 sg_dma_address(sg) = obj->phys_handle->busaddr; 243 sg_dma_len(sg) = obj->base.size; 244 245 obj->pages = st; 246 obj->has_dma_mapping = true; 247 return 0; 248 } 249 250 static void 251 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj) 252 { 253 int ret; 254 255 BUG_ON(obj->madv == __I915_MADV_PURGED); 256 257 ret = i915_gem_object_set_to_cpu_domain(obj, true); 258 if (ret) { 259 /* In the event of a disaster, abandon all caches and 260 * hope for the best. 261 */ 262 WARN_ON(ret != -EIO); 263 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 264 } 265 266 if (obj->madv == I915_MADV_DONTNEED) 267 obj->dirty = 0; 268 269 if (obj->dirty) { 270 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; 271 char *vaddr = obj->phys_handle->vaddr; 272 int i; 273 274 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 275 struct page *page; 276 char *dst; 277 278 page = shmem_read_mapping_page(mapping, i); 279 if (IS_ERR(page)) 280 continue; 281 282 dst = kmap_atomic(page); 283 drm_clflush_virt_range(vaddr, PAGE_SIZE); 284 memcpy(dst, vaddr, PAGE_SIZE); 285 kunmap_atomic(dst); 286 287 set_page_dirty(page); 288 if (obj->madv == I915_MADV_WILLNEED) 289 mark_page_accessed(page); 290 page_cache_release(page); 291 vaddr += PAGE_SIZE; 292 } 293 obj->dirty = 0; 294 } 295 296 sg_free_table(obj->pages); 297 kfree(obj->pages); 298 299 obj->has_dma_mapping = false; 300 } 301 302 static void 303 i915_gem_object_release_phys(struct drm_i915_gem_object *obj) 304 { 305 drm_pci_free(obj->base.dev, obj->phys_handle); 306 } 307 308 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { 309 .get_pages = i915_gem_object_get_pages_phys, 310 .put_pages = i915_gem_object_put_pages_phys, 311 .release = i915_gem_object_release_phys, 312 }; 313 #endif 314 315 static int 316 drop_pages(struct drm_i915_gem_object *obj) 317 { 318 struct i915_vma *vma, *next; 319 int ret; 320 321 drm_gem_object_reference(&obj->base); 322 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) 323 if (i915_vma_unbind(vma)) 324 break; 325 326 ret = i915_gem_object_put_pages(obj); 327 drm_gem_object_unreference(&obj->base); 328 329 return ret; 330 } 331 332 int 333 i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, 334 int align) 335 { 336 drm_dma_handle_t *phys; 337 int ret; 338 339 if (obj->phys_handle) { 340 if ((unsigned long)obj->phys_handle->vaddr & (align -1)) 341 return -EBUSY; 342 343 return 0; 344 } 345 346 if (obj->madv != I915_MADV_WILLNEED) 347 return -EFAULT; 348 349 #if 0 350 if (obj->base.filp == NULL) 351 return -EINVAL; 352 #endif 353 354 ret = drop_pages(obj); 355 if (ret) 356 return ret; 357 358 /* create a new object */ 359 phys = drm_pci_alloc(obj->base.dev, obj->base.size, align); 360 if (!phys) 361 return -ENOMEM; 362 363 obj->phys_handle = phys; 364 #if 0 365 obj->ops = &i915_gem_phys_ops; 366 #endif 367 368 return i915_gem_object_get_pages(obj); 369 } 370 371 static int 372 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, 373 struct drm_i915_gem_pwrite *args, 374 struct drm_file *file_priv) 375 { 376 struct drm_device *dev = obj->base.dev; 377 void *vaddr = (char *)obj->phys_handle->vaddr + args->offset; 378 char __user *user_data = to_user_ptr(args->data_ptr); 379 int ret; 380 381 /* We manually control the domain here and pretend that it 382 * remains coherent i.e. in the GTT domain, like shmem_pwrite. 383 */ 384 ret = i915_gem_object_wait_rendering(obj, false); 385 if (ret) 386 return ret; 387 388 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) { 389 unsigned long unwritten; 390 391 /* The physical object once assigned is fixed for the lifetime 392 * of the obj, so we can safely drop the lock and continue 393 * to access vaddr. 394 */ 395 mutex_unlock(&dev->struct_mutex); 396 unwritten = copy_from_user(vaddr, user_data, args->size); 397 mutex_lock(&dev->struct_mutex); 398 if (unwritten) 399 return -EFAULT; 400 } 401 402 drm_clflush_virt_range(vaddr, args->size); 403 i915_gem_chipset_flush(dev); 404 return 0; 405 } 406 407 void *i915_gem_object_alloc(struct drm_device *dev) 408 { 409 return kmalloc(sizeof(struct drm_i915_gem_object), 410 M_DRM, M_WAITOK | M_ZERO); 411 } 412 413 void i915_gem_object_free(struct drm_i915_gem_object *obj) 414 { 415 kfree(obj); 416 } 417 418 static int 419 i915_gem_create(struct drm_file *file, 420 struct drm_device *dev, 421 uint64_t size, 422 uint32_t *handle_p) 423 { 424 struct drm_i915_gem_object *obj; 425 int ret; 426 u32 handle; 427 428 size = roundup(size, PAGE_SIZE); 429 if (size == 0) 430 return -EINVAL; 431 432 /* Allocate the new object */ 433 obj = i915_gem_alloc_object(dev, size); 434 if (obj == NULL) 435 return -ENOMEM; 436 437 ret = drm_gem_handle_create(file, &obj->base, &handle); 438 /* drop reference from allocate - handle holds it now */ 439 drm_gem_object_unreference_unlocked(&obj->base); 440 if (ret) 441 return ret; 442 443 *handle_p = handle; 444 return 0; 445 } 446 447 int 448 i915_gem_dumb_create(struct drm_file *file, 449 struct drm_device *dev, 450 struct drm_mode_create_dumb *args) 451 { 452 /* have to work out size/pitch and return them */ 453 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); 454 args->size = args->pitch * args->height; 455 return i915_gem_create(file, dev, 456 args->size, &args->handle); 457 } 458 459 /** 460 * Creates a new mm object and returns a handle to it. 461 */ 462 int 463 i915_gem_create_ioctl(struct drm_device *dev, void *data, 464 struct drm_file *file) 465 { 466 struct drm_i915_gem_create *args = data; 467 468 return i915_gem_create(file, dev, 469 args->size, &args->handle); 470 } 471 472 static inline int 473 __copy_to_user_swizzled(char __user *cpu_vaddr, 474 const char *gpu_vaddr, int gpu_offset, 475 int length) 476 { 477 int ret, cpu_offset = 0; 478 479 while (length > 0) { 480 int cacheline_end = ALIGN(gpu_offset + 1, 64); 481 int this_length = min(cacheline_end - gpu_offset, length); 482 int swizzled_gpu_offset = gpu_offset ^ 64; 483 484 ret = __copy_to_user(cpu_vaddr + cpu_offset, 485 gpu_vaddr + swizzled_gpu_offset, 486 this_length); 487 if (ret) 488 return ret + length; 489 490 cpu_offset += this_length; 491 gpu_offset += this_length; 492 length -= this_length; 493 } 494 495 return 0; 496 } 497 498 static inline int 499 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, 500 const char __user *cpu_vaddr, 501 int length) 502 { 503 int ret, cpu_offset = 0; 504 505 while (length > 0) { 506 int cacheline_end = ALIGN(gpu_offset + 1, 64); 507 int this_length = min(cacheline_end - gpu_offset, length); 508 int swizzled_gpu_offset = gpu_offset ^ 64; 509 510 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, 511 cpu_vaddr + cpu_offset, 512 this_length); 513 if (ret) 514 return ret + length; 515 516 cpu_offset += this_length; 517 gpu_offset += this_length; 518 length -= this_length; 519 } 520 521 return 0; 522 } 523 524 /* 525 * Pins the specified object's pages and synchronizes the object with 526 * GPU accesses. Sets needs_clflush to non-zero if the caller should 527 * flush the object from the CPU cache. 528 */ 529 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, 530 int *needs_clflush) 531 { 532 int ret; 533 534 *needs_clflush = 0; 535 536 #if 0 537 if (!obj->base.filp) 538 return -EINVAL; 539 #endif 540 541 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) { 542 /* If we're not in the cpu read domain, set ourself into the gtt 543 * read domain and manually flush cachelines (if required). This 544 * optimizes for the case when the gpu will dirty the data 545 * anyway again before the next pread happens. */ 546 *needs_clflush = !cpu_cache_is_coherent(obj->base.dev, 547 obj->cache_level); 548 ret = i915_gem_object_wait_rendering(obj, true); 549 if (ret) 550 return ret; 551 552 i915_gem_object_retire(obj); 553 } 554 555 ret = i915_gem_object_get_pages(obj); 556 if (ret) 557 return ret; 558 559 i915_gem_object_pin_pages(obj); 560 561 return ret; 562 } 563 564 /* Per-page copy function for the shmem pread fastpath. 565 * Flushes invalid cachelines before reading the target if 566 * needs_clflush is set. */ 567 static int 568 shmem_pread_fast(struct vm_page *page, int shmem_page_offset, int page_length, 569 char __user *user_data, 570 bool page_do_bit17_swizzling, bool needs_clflush) 571 { 572 char *vaddr; 573 int ret; 574 575 if (unlikely(page_do_bit17_swizzling)) 576 return -EINVAL; 577 578 vaddr = kmap_atomic(page); 579 if (needs_clflush) 580 drm_clflush_virt_range(vaddr + shmem_page_offset, 581 page_length); 582 ret = __copy_to_user_inatomic(user_data, 583 vaddr + shmem_page_offset, 584 page_length); 585 kunmap_atomic(vaddr); 586 587 return ret ? -EFAULT : 0; 588 } 589 590 static void 591 shmem_clflush_swizzled_range(char *addr, unsigned long length, 592 bool swizzled) 593 { 594 if (unlikely(swizzled)) { 595 unsigned long start = (unsigned long) addr; 596 unsigned long end = (unsigned long) addr + length; 597 598 /* For swizzling simply ensure that we always flush both 599 * channels. Lame, but simple and it works. Swizzled 600 * pwrite/pread is far from a hotpath - current userspace 601 * doesn't use it at all. */ 602 start = round_down(start, 128); 603 end = round_up(end, 128); 604 605 drm_clflush_virt_range((void *)start, end - start); 606 } else { 607 drm_clflush_virt_range(addr, length); 608 } 609 610 } 611 612 /* Only difference to the fast-path function is that this can handle bit17 613 * and uses non-atomic copy and kmap functions. */ 614 static int 615 shmem_pread_slow(struct vm_page *page, int shmem_page_offset, int page_length, 616 char __user *user_data, 617 bool page_do_bit17_swizzling, bool needs_clflush) 618 { 619 char *vaddr; 620 int ret; 621 622 vaddr = kmap(page); 623 if (needs_clflush) 624 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 625 page_length, 626 page_do_bit17_swizzling); 627 628 if (page_do_bit17_swizzling) 629 ret = __copy_to_user_swizzled(user_data, 630 vaddr, shmem_page_offset, 631 page_length); 632 else 633 ret = __copy_to_user(user_data, 634 vaddr + shmem_page_offset, 635 page_length); 636 kunmap(page); 637 638 return ret ? - EFAULT : 0; 639 } 640 641 static int 642 i915_gem_shmem_pread(struct drm_device *dev, 643 struct drm_i915_gem_object *obj, 644 struct drm_i915_gem_pread *args, 645 struct drm_file *file) 646 { 647 char __user *user_data; 648 ssize_t remain; 649 loff_t offset; 650 int shmem_page_offset, page_length, ret = 0; 651 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 652 int prefaulted = 0; 653 int needs_clflush = 0; 654 int i; 655 656 user_data = to_user_ptr(args->data_ptr); 657 remain = args->size; 658 659 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 660 661 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); 662 if (ret) 663 return ret; 664 665 offset = args->offset; 666 667 for (i = 0; i < (obj->base.size >> PAGE_SHIFT); i++) { 668 struct vm_page *page = obj->pages[i]; 669 670 if (remain <= 0) 671 break; 672 673 /* Operation in this page 674 * 675 * shmem_page_offset = offset within page in shmem file 676 * page_length = bytes to copy for this page 677 */ 678 shmem_page_offset = offset_in_page(offset); 679 page_length = remain; 680 if ((shmem_page_offset + page_length) > PAGE_SIZE) 681 page_length = PAGE_SIZE - shmem_page_offset; 682 683 page_do_bit17_swizzling = obj_do_bit17_swizzling && 684 (page_to_phys(page) & (1 << 17)) != 0; 685 686 ret = shmem_pread_fast(page, shmem_page_offset, page_length, 687 user_data, page_do_bit17_swizzling, 688 needs_clflush); 689 if (ret == 0) 690 goto next_page; 691 692 mutex_unlock(&dev->struct_mutex); 693 694 if (likely(!i915.prefault_disable) && !prefaulted) { 695 ret = fault_in_multipages_writeable(user_data, remain); 696 /* Userspace is tricking us, but we've already clobbered 697 * its pages with the prefault and promised to write the 698 * data up to the first fault. Hence ignore any errors 699 * and just continue. */ 700 (void)ret; 701 prefaulted = 1; 702 } 703 704 ret = shmem_pread_slow(page, shmem_page_offset, page_length, 705 user_data, page_do_bit17_swizzling, 706 needs_clflush); 707 708 mutex_lock(&dev->struct_mutex); 709 710 if (ret) 711 goto out; 712 713 next_page: 714 remain -= page_length; 715 user_data += page_length; 716 offset += page_length; 717 } 718 719 out: 720 i915_gem_object_unpin_pages(obj); 721 722 return ret; 723 } 724 725 /** 726 * Reads data from the object referenced by handle. 727 * 728 * On error, the contents of *data are undefined. 729 */ 730 int 731 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 732 struct drm_file *file) 733 { 734 struct drm_i915_gem_pread *args = data; 735 struct drm_i915_gem_object *obj; 736 int ret = 0; 737 738 if (args->size == 0) 739 return 0; 740 741 ret = i915_mutex_lock_interruptible(dev); 742 if (ret) 743 return ret; 744 745 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 746 if (&obj->base == NULL) { 747 ret = -ENOENT; 748 goto unlock; 749 } 750 751 /* Bounds check source. */ 752 if (args->offset > obj->base.size || 753 args->size > obj->base.size - args->offset) { 754 ret = -EINVAL; 755 goto out; 756 } 757 758 trace_i915_gem_object_pread(obj, args->offset, args->size); 759 760 ret = i915_gem_shmem_pread(dev, obj, args, file); 761 762 out: 763 drm_gem_object_unreference(&obj->base); 764 unlock: 765 mutex_unlock(&dev->struct_mutex); 766 return ret; 767 } 768 769 /* This is the fast write path which cannot handle 770 * page faults in the source data 771 */ 772 773 static inline int 774 fast_user_write(struct io_mapping *mapping, 775 loff_t page_base, int page_offset, 776 char __user *user_data, 777 int length) 778 { 779 void __iomem *vaddr_atomic; 780 void *vaddr; 781 unsigned long unwritten; 782 783 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); 784 /* We can use the cpu mem copy function because this is X86. */ 785 vaddr = (char __force*)vaddr_atomic + page_offset; 786 unwritten = __copy_from_user_inatomic_nocache(vaddr, 787 user_data, length); 788 io_mapping_unmap_atomic(vaddr_atomic); 789 return unwritten; 790 } 791 792 /** 793 * This is the fast pwrite path, where we copy the data directly from the 794 * user into the GTT, uncached. 795 */ 796 static int 797 i915_gem_gtt_pwrite_fast(struct drm_device *dev, 798 struct drm_i915_gem_object *obj, 799 struct drm_i915_gem_pwrite *args, 800 struct drm_file *file) 801 { 802 struct drm_i915_private *dev_priv = dev->dev_private; 803 ssize_t remain; 804 loff_t offset, page_base; 805 char __user *user_data; 806 int page_offset, page_length, ret; 807 808 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK); 809 if (ret) 810 goto out; 811 812 ret = i915_gem_object_set_to_gtt_domain(obj, true); 813 if (ret) 814 goto out_unpin; 815 816 ret = i915_gem_object_put_fence(obj); 817 if (ret) 818 goto out_unpin; 819 820 user_data = to_user_ptr(args->data_ptr); 821 remain = args->size; 822 823 offset = i915_gem_obj_ggtt_offset(obj) + args->offset; 824 825 while (remain > 0) { 826 /* Operation in this page 827 * 828 * page_base = page offset within aperture 829 * page_offset = offset within page 830 * page_length = bytes to copy for this page 831 */ 832 page_base = offset & ~PAGE_MASK; 833 page_offset = offset_in_page(offset); 834 page_length = remain; 835 if ((page_offset + remain) > PAGE_SIZE) 836 page_length = PAGE_SIZE - page_offset; 837 838 /* If we get a fault while copying data, then (presumably) our 839 * source page isn't available. Return the error and we'll 840 * retry in the slow path. 841 */ 842 if (fast_user_write(dev_priv->gtt.mappable, page_base, 843 page_offset, user_data, page_length)) { 844 ret = -EFAULT; 845 goto out_unpin; 846 } 847 848 remain -= page_length; 849 user_data += page_length; 850 offset += page_length; 851 } 852 853 out_unpin: 854 i915_gem_object_ggtt_unpin(obj); 855 out: 856 return ret; 857 } 858 859 /* Per-page copy function for the shmem pwrite fastpath. 860 * Flushes invalid cachelines before writing to the target if 861 * needs_clflush_before is set and flushes out any written cachelines after 862 * writing if needs_clflush is set. */ 863 static int 864 shmem_pwrite_fast(struct vm_page *page, int shmem_page_offset, int page_length, 865 char __user *user_data, 866 bool page_do_bit17_swizzling, 867 bool needs_clflush_before, 868 bool needs_clflush_after) 869 { 870 char *vaddr; 871 int ret; 872 873 if (unlikely(page_do_bit17_swizzling)) 874 return -EINVAL; 875 876 vaddr = kmap_atomic(page); 877 if (needs_clflush_before) 878 drm_clflush_virt_range(vaddr + shmem_page_offset, 879 page_length); 880 ret = __copy_from_user_inatomic(vaddr + shmem_page_offset, 881 user_data, page_length); 882 if (needs_clflush_after) 883 drm_clflush_virt_range(vaddr + shmem_page_offset, 884 page_length); 885 kunmap_atomic(vaddr); 886 887 return ret ? -EFAULT : 0; 888 } 889 890 /* Only difference to the fast-path function is that this can handle bit17 891 * and uses non-atomic copy and kmap functions. */ 892 static int 893 shmem_pwrite_slow(struct vm_page *page, int shmem_page_offset, int page_length, 894 char __user *user_data, 895 bool page_do_bit17_swizzling, 896 bool needs_clflush_before, 897 bool needs_clflush_after) 898 { 899 char *vaddr; 900 int ret; 901 902 vaddr = kmap(page); 903 if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) 904 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 905 page_length, 906 page_do_bit17_swizzling); 907 if (page_do_bit17_swizzling) 908 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset, 909 user_data, 910 page_length); 911 else 912 ret = __copy_from_user(vaddr + shmem_page_offset, 913 user_data, 914 page_length); 915 if (needs_clflush_after) 916 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 917 page_length, 918 page_do_bit17_swizzling); 919 kunmap(page); 920 921 return ret ? -EFAULT : 0; 922 } 923 924 static int 925 i915_gem_shmem_pwrite(struct drm_device *dev, 926 struct drm_i915_gem_object *obj, 927 struct drm_i915_gem_pwrite *args, 928 struct drm_file *file) 929 { 930 ssize_t remain; 931 loff_t offset; 932 char __user *user_data; 933 int shmem_page_offset, page_length, ret = 0; 934 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 935 int hit_slowpath = 0; 936 int needs_clflush_after = 0; 937 int needs_clflush_before = 0; 938 int i; 939 940 user_data = to_user_ptr(args->data_ptr); 941 remain = args->size; 942 943 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 944 945 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 946 /* If we're not in the cpu write domain, set ourself into the gtt 947 * write domain and manually flush cachelines (if required). This 948 * optimizes for the case when the gpu will use the data 949 * right away and we therefore have to clflush anyway. */ 950 needs_clflush_after = cpu_write_needs_clflush(obj); 951 ret = i915_gem_object_wait_rendering(obj, false); 952 if (ret) 953 return ret; 954 955 i915_gem_object_retire(obj); 956 } 957 /* Same trick applies to invalidate partially written cachelines read 958 * before writing. */ 959 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) 960 needs_clflush_before = 961 !cpu_cache_is_coherent(dev, obj->cache_level); 962 963 ret = i915_gem_object_get_pages(obj); 964 if (ret) 965 return ret; 966 967 i915_gem_object_pin_pages(obj); 968 969 offset = args->offset; 970 obj->dirty = 1; 971 972 VM_OBJECT_LOCK(obj->base.vm_obj); 973 vm_object_pip_add(obj->base.vm_obj, 1); 974 for (i = 0; i < (obj->base.size >> PAGE_SHIFT); i++) { 975 struct vm_page *page = obj->pages[i]; 976 int partial_cacheline_write; 977 978 if (i < offset >> PAGE_SHIFT) 979 continue; 980 981 if (remain <= 0) 982 break; 983 984 /* Operation in this page 985 * 986 * shmem_page_offset = offset within page in shmem file 987 * page_length = bytes to copy for this page 988 */ 989 shmem_page_offset = offset_in_page(offset); 990 991 page_length = remain; 992 if ((shmem_page_offset + page_length) > PAGE_SIZE) 993 page_length = PAGE_SIZE - shmem_page_offset; 994 995 /* If we don't overwrite a cacheline completely we need to be 996 * careful to have up-to-date data by first clflushing. Don't 997 * overcomplicate things and flush the entire patch. */ 998 partial_cacheline_write = needs_clflush_before && 999 ((shmem_page_offset | page_length) 1000 & (cpu_clflush_line_size - 1)); 1001 1002 page_do_bit17_swizzling = obj_do_bit17_swizzling && 1003 (page_to_phys(page) & (1 << 17)) != 0; 1004 1005 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length, 1006 user_data, page_do_bit17_swizzling, 1007 partial_cacheline_write, 1008 needs_clflush_after); 1009 if (ret == 0) 1010 goto next_page; 1011 1012 hit_slowpath = 1; 1013 mutex_unlock(&dev->struct_mutex); 1014 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length, 1015 user_data, page_do_bit17_swizzling, 1016 partial_cacheline_write, 1017 needs_clflush_after); 1018 1019 mutex_lock(&dev->struct_mutex); 1020 1021 if (ret) 1022 goto out; 1023 1024 next_page: 1025 remain -= page_length; 1026 user_data += page_length; 1027 offset += page_length; 1028 } 1029 vm_object_pip_wakeup(obj->base.vm_obj); 1030 VM_OBJECT_UNLOCK(obj->base.vm_obj); 1031 1032 out: 1033 i915_gem_object_unpin_pages(obj); 1034 1035 if (hit_slowpath) { 1036 /* 1037 * Fixup: Flush cpu caches in case we didn't flush the dirty 1038 * cachelines in-line while writing and the object moved 1039 * out of the cpu write domain while we've dropped the lock. 1040 */ 1041 if (!needs_clflush_after && 1042 obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 1043 if (i915_gem_clflush_object(obj, obj->pin_display)) 1044 i915_gem_chipset_flush(dev); 1045 } 1046 } 1047 1048 if (needs_clflush_after) 1049 i915_gem_chipset_flush(dev); 1050 1051 return ret; 1052 } 1053 1054 /** 1055 * Writes data to the object referenced by handle. 1056 * 1057 * On error, the contents of the buffer that were to be modified are undefined. 1058 */ 1059 int 1060 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 1061 struct drm_file *file) 1062 { 1063 struct drm_i915_private *dev_priv = dev->dev_private; 1064 struct drm_i915_gem_pwrite *args = data; 1065 struct drm_i915_gem_object *obj; 1066 int ret; 1067 1068 if (args->size == 0) 1069 return 0; 1070 1071 if (likely(!i915.prefault_disable)) { 1072 ret = fault_in_multipages_readable(to_user_ptr(args->data_ptr), 1073 args->size); 1074 if (ret) 1075 return -EFAULT; 1076 } 1077 1078 intel_runtime_pm_get(dev_priv); 1079 1080 ret = i915_mutex_lock_interruptible(dev); 1081 if (ret) 1082 goto put_rpm; 1083 1084 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1085 if (&obj->base == NULL) { 1086 ret = -ENOENT; 1087 goto unlock; 1088 } 1089 1090 /* Bounds check destination. */ 1091 if (args->offset > obj->base.size || 1092 args->size > obj->base.size - args->offset) { 1093 ret = -EINVAL; 1094 goto out; 1095 } 1096 1097 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 1098 1099 ret = -EFAULT; 1100 /* We can only do the GTT pwrite on untiled buffers, as otherwise 1101 * it would end up going through the fenced access, and we'll get 1102 * different detiling behavior between reading and writing. 1103 * pread/pwrite currently are reading and writing from the CPU 1104 * perspective, requiring manual detiling by the client. 1105 */ 1106 1107 if (obj->tiling_mode == I915_TILING_NONE && 1108 obj->base.write_domain != I915_GEM_DOMAIN_CPU && 1109 cpu_write_needs_clflush(obj)) { 1110 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); 1111 /* Note that the gtt paths might fail with non-page-backed user 1112 * pointers (e.g. gtt mappings when moving data between 1113 * textures). Fallback to the shmem path in that case. */ 1114 } 1115 1116 if (ret == -EFAULT || ret == -ENOSPC) { 1117 if (obj->phys_handle) 1118 ret = i915_gem_phys_pwrite(obj, args, file); 1119 else 1120 ret = i915_gem_shmem_pwrite(dev, obj, args, file); 1121 } 1122 1123 out: 1124 drm_gem_object_unreference(&obj->base); 1125 unlock: 1126 mutex_unlock(&dev->struct_mutex); 1127 put_rpm: 1128 intel_runtime_pm_put(dev_priv); 1129 1130 return ret; 1131 } 1132 1133 int 1134 i915_gem_check_wedge(struct i915_gpu_error *error, 1135 bool interruptible) 1136 { 1137 if (i915_reset_in_progress(error)) { 1138 /* Non-interruptible callers can't handle -EAGAIN, hence return 1139 * -EIO unconditionally for these. */ 1140 if (!interruptible) 1141 return -EIO; 1142 1143 /* Recovery complete, but the reset failed ... */ 1144 if (i915_terminally_wedged(error)) 1145 return -EIO; 1146 1147 /* 1148 * Check if GPU Reset is in progress - we need intel_ring_begin 1149 * to work properly to reinit the hw state while the gpu is 1150 * still marked as reset-in-progress. Handle this with a flag. 1151 */ 1152 if (!error->reload_in_reset) 1153 return -EAGAIN; 1154 } 1155 1156 return 0; 1157 } 1158 1159 /* 1160 * Compare arbitrary request against outstanding lazy request. Emit on match. 1161 */ 1162 int 1163 i915_gem_check_olr(struct drm_i915_gem_request *req) 1164 { 1165 int ret; 1166 1167 WARN_ON(!mutex_is_locked(&req->ring->dev->struct_mutex)); 1168 1169 ret = 0; 1170 if (req == req->ring->outstanding_lazy_request) 1171 ret = i915_add_request(req->ring); 1172 1173 return ret; 1174 } 1175 1176 #if 0 1177 static void fake_irq(unsigned long data) 1178 { 1179 wake_up_process((struct task_struct *)data); 1180 } 1181 1182 static bool missed_irq(struct drm_i915_private *dev_priv, 1183 struct intel_engine_cs *ring) 1184 { 1185 return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings); 1186 } 1187 #endif 1188 1189 static bool can_wait_boost(struct drm_i915_file_private *file_priv) 1190 { 1191 if (file_priv == NULL) 1192 return true; 1193 1194 return !atomic_xchg(&file_priv->rps_wait_boost, true); 1195 } 1196 1197 /** 1198 * __i915_wait_request - wait until execution of request has finished 1199 * @req: duh! 1200 * @reset_counter: reset sequence associated with the given request 1201 * @interruptible: do an interruptible wait (normally yes) 1202 * @timeout: in - how long to wait (NULL forever); out - how much time remaining 1203 * 1204 * Note: It is of utmost importance that the passed in seqno and reset_counter 1205 * values have been read by the caller in an smp safe manner. Where read-side 1206 * locks are involved, it is sufficient to read the reset_counter before 1207 * unlocking the lock that protects the seqno. For lockless tricks, the 1208 * reset_counter _must_ be read before, and an appropriate smp_rmb must be 1209 * inserted. 1210 * 1211 * Returns 0 if the request was found within the alloted time. Else returns the 1212 * errno with remaining time filled in timeout argument. 1213 */ 1214 int __i915_wait_request(struct drm_i915_gem_request *req, 1215 unsigned reset_counter, 1216 bool interruptible, 1217 s64 *timeout, 1218 struct drm_i915_file_private *file_priv) 1219 { 1220 struct intel_engine_cs *ring = i915_gem_request_get_ring(req); 1221 struct drm_device *dev = ring->dev; 1222 struct drm_i915_private *dev_priv = dev->dev_private; 1223 const bool irq_test_in_progress = 1224 ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring); 1225 unsigned long timeout_expire; 1226 long end; 1227 bool wait_forever = true; 1228 s64 before, now; 1229 int ret; 1230 1231 WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled"); 1232 1233 if (i915_gem_request_completed(req, true)) 1234 return 0; 1235 1236 if (timeout != NULL) 1237 wait_forever = false; 1238 1239 timeout_expire = timeout ? 1240 jiffies + nsecs_to_jiffies_timeout((u64)*timeout) : 0; 1241 1242 if (INTEL_INFO(dev)->gen >= 6 && ring->id == RCS && can_wait_boost(file_priv)) { 1243 gen6_rps_boost(dev_priv); 1244 if (file_priv) 1245 mod_delayed_work(dev_priv->wq, 1246 &file_priv->mm.idle_work, 1247 msecs_to_jiffies(100)); 1248 } 1249 1250 if (!irq_test_in_progress && WARN_ON(!ring->irq_get(ring))) 1251 return -ENODEV; 1252 1253 /* Record current time in case interrupted by signal, or wedged */ 1254 trace_i915_gem_request_wait_begin(req); 1255 before = ktime_get_raw_ns(); 1256 1257 #define EXIT_COND \ 1258 (i915_seqno_passed(ring->get_seqno(ring, false), i915_gem_request_get_seqno(req)) || \ 1259 i915_reset_in_progress(&dev_priv->gpu_error) || \ 1260 reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) 1261 do { 1262 if (interruptible) 1263 end = wait_event_interruptible_timeout(ring->irq_queue, 1264 EXIT_COND, 1265 timeout_expire); 1266 else 1267 end = wait_event_timeout(ring->irq_queue, EXIT_COND, 1268 timeout_expire); 1269 1270 /* We need to check whether any gpu reset happened in between 1271 * the caller grabbing the seqno and now ... */ 1272 if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) 1273 end = -EAGAIN; 1274 1275 /* ... but upgrade the -EGAIN to an -EIO if the gpu is truely 1276 * gone. */ 1277 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); 1278 if (ret) 1279 end = ret; 1280 } while (end == 0 && wait_forever); 1281 1282 now = ktime_get_raw_ns(); 1283 trace_i915_gem_request_wait_end(req); 1284 1285 ring->irq_put(ring); 1286 #undef EXIT_COND 1287 1288 if (timeout) { 1289 s64 tres = *timeout - (now - before); 1290 1291 *timeout = tres < 0 ? 0 : tres; 1292 } 1293 1294 switch (end) { 1295 case -EIO: 1296 case -EAGAIN: /* Wedged */ 1297 case -ERESTARTSYS: /* Signal */ 1298 return (int)end; 1299 case 0: /* Timeout */ 1300 return -ETIMEDOUT; /* -ETIME on Linux */ 1301 default: /* Completed */ 1302 WARN_ON(end < 0); /* We're not aware of other errors */ 1303 return 0; 1304 } 1305 } 1306 1307 /** 1308 * Waits for a request to be signaled, and cleans up the 1309 * request and object lists appropriately for that event. 1310 */ 1311 int 1312 i915_wait_request(struct drm_i915_gem_request *req) 1313 { 1314 struct drm_device *dev; 1315 struct drm_i915_private *dev_priv; 1316 bool interruptible; 1317 unsigned reset_counter; 1318 int ret; 1319 1320 BUG_ON(req == NULL); 1321 1322 dev = req->ring->dev; 1323 dev_priv = dev->dev_private; 1324 interruptible = dev_priv->mm.interruptible; 1325 1326 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1327 1328 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); 1329 if (ret) 1330 return ret; 1331 1332 ret = i915_gem_check_olr(req); 1333 if (ret) 1334 return ret; 1335 1336 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 1337 i915_gem_request_reference(req); 1338 ret = __i915_wait_request(req, reset_counter, 1339 interruptible, NULL, NULL); 1340 i915_gem_request_unreference(req); 1341 return ret; 1342 } 1343 1344 static int 1345 i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj) 1346 { 1347 if (!obj->active) 1348 return 0; 1349 1350 /* Manually manage the write flush as we may have not yet 1351 * retired the buffer. 1352 * 1353 * Note that the last_write_req is always the earlier of 1354 * the two (read/write) requests, so if we haved successfully waited, 1355 * we know we have passed the last write. 1356 */ 1357 i915_gem_request_assign(&obj->last_write_req, NULL); 1358 1359 return 0; 1360 } 1361 1362 /** 1363 * Ensures that all rendering to the object has completed and the object is 1364 * safe to unbind from the GTT or access from the CPU. 1365 */ 1366 static __must_check int 1367 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, 1368 bool readonly) 1369 { 1370 struct drm_i915_gem_request *req; 1371 int ret; 1372 1373 req = readonly ? obj->last_write_req : obj->last_read_req; 1374 if (!req) 1375 return 0; 1376 1377 ret = i915_wait_request(req); 1378 if (ret) 1379 return ret; 1380 1381 return i915_gem_object_wait_rendering__tail(obj); 1382 } 1383 1384 /* A nonblocking variant of the above wait. This is a highly dangerous routine 1385 * as the object state may change during this call. 1386 */ 1387 static __must_check int 1388 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, 1389 struct drm_i915_file_private *file_priv, 1390 bool readonly) 1391 { 1392 struct drm_i915_gem_request *req; 1393 struct drm_device *dev = obj->base.dev; 1394 struct drm_i915_private *dev_priv = dev->dev_private; 1395 unsigned reset_counter; 1396 int ret; 1397 1398 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1399 BUG_ON(!dev_priv->mm.interruptible); 1400 1401 req = readonly ? obj->last_write_req : obj->last_read_req; 1402 if (!req) 1403 return 0; 1404 1405 ret = i915_gem_check_wedge(&dev_priv->gpu_error, true); 1406 if (ret) 1407 return ret; 1408 1409 ret = i915_gem_check_olr(req); 1410 if (ret) 1411 return ret; 1412 1413 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 1414 i915_gem_request_reference(req); 1415 mutex_unlock(&dev->struct_mutex); 1416 ret = __i915_wait_request(req, reset_counter, true, NULL, file_priv); 1417 mutex_lock(&dev->struct_mutex); 1418 i915_gem_request_unreference(req); 1419 if (ret) 1420 return ret; 1421 1422 return i915_gem_object_wait_rendering__tail(obj); 1423 } 1424 1425 /** 1426 * Called when user space prepares to use an object with the CPU, either 1427 * through the mmap ioctl's mapping or a GTT mapping. 1428 */ 1429 int 1430 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1431 struct drm_file *file) 1432 { 1433 struct drm_i915_gem_set_domain *args = data; 1434 struct drm_i915_gem_object *obj; 1435 uint32_t read_domains = args->read_domains; 1436 uint32_t write_domain = args->write_domain; 1437 int ret; 1438 1439 /* Only handle setting domains to types used by the CPU. */ 1440 if (write_domain & I915_GEM_GPU_DOMAINS) 1441 return -EINVAL; 1442 1443 if (read_domains & I915_GEM_GPU_DOMAINS) 1444 return -EINVAL; 1445 1446 /* Having something in the write domain implies it's in the read 1447 * domain, and only that read domain. Enforce that in the request. 1448 */ 1449 if (write_domain != 0 && read_domains != write_domain) 1450 return -EINVAL; 1451 1452 ret = i915_mutex_lock_interruptible(dev); 1453 if (ret) 1454 return ret; 1455 1456 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1457 if (&obj->base == NULL) { 1458 ret = -ENOENT; 1459 goto unlock; 1460 } 1461 1462 /* Try to flush the object off the GPU without holding the lock. 1463 * We will repeat the flush holding the lock in the normal manner 1464 * to catch cases where we are gazumped. 1465 */ 1466 ret = i915_gem_object_wait_rendering__nonblocking(obj, 1467 file->driver_priv, 1468 !write_domain); 1469 if (ret) 1470 goto unref; 1471 1472 if (read_domains & I915_GEM_DOMAIN_GTT) 1473 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 1474 else 1475 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 1476 1477 unref: 1478 drm_gem_object_unreference(&obj->base); 1479 unlock: 1480 mutex_unlock(&dev->struct_mutex); 1481 return ret; 1482 } 1483 1484 /** 1485 * Called when user space has done writes to this buffer 1486 */ 1487 int 1488 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1489 struct drm_file *file) 1490 { 1491 struct drm_i915_gem_sw_finish *args = data; 1492 struct drm_i915_gem_object *obj; 1493 int ret = 0; 1494 1495 ret = i915_mutex_lock_interruptible(dev); 1496 if (ret) 1497 return ret; 1498 1499 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1500 if (&obj->base == NULL) { 1501 ret = -ENOENT; 1502 goto unlock; 1503 } 1504 1505 /* Pinned buffers may be scanout, so flush the cache */ 1506 if (obj->pin_display) 1507 i915_gem_object_flush_cpu_write_domain(obj); 1508 1509 drm_gem_object_unreference(&obj->base); 1510 unlock: 1511 mutex_unlock(&dev->struct_mutex); 1512 return ret; 1513 } 1514 1515 /** 1516 * Maps the contents of an object, returning the address it is mapped 1517 * into. 1518 * 1519 * While the mapping holds a reference on the contents of the object, it doesn't 1520 * imply a ref on the object itself. 1521 * 1522 * IMPORTANT: 1523 * 1524 * DRM driver writers who look a this function as an example for how to do GEM 1525 * mmap support, please don't implement mmap support like here. The modern way 1526 * to implement DRM mmap support is with an mmap offset ioctl (like 1527 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly. 1528 * That way debug tooling like valgrind will understand what's going on, hiding 1529 * the mmap call in a driver private ioctl will break that. The i915 driver only 1530 * does cpu mmaps this way because we didn't know better. 1531 */ 1532 int 1533 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1534 struct drm_file *file) 1535 { 1536 struct drm_i915_gem_mmap *args = data; 1537 struct drm_gem_object *obj; 1538 unsigned long addr; 1539 struct proc *p = curproc; 1540 vm_map_t map = &p->p_vmspace->vm_map; 1541 vm_size_t size; 1542 int error = 0, rv; 1543 1544 obj = drm_gem_object_lookup(dev, file, args->handle); 1545 if (obj == NULL) 1546 return -ENOENT; 1547 1548 if (args->size == 0) 1549 goto out; 1550 1551 size = round_page(args->size); 1552 if (map->size + size > p->p_rlimit[RLIMIT_VMEM].rlim_cur) { 1553 error = -ENOMEM; 1554 goto out; 1555 } 1556 1557 /* 1558 * Call hint to ensure that NULL is not returned as a valid address 1559 * and to reduce vm_map traversals. XXX causes instability, use a 1560 * fixed low address as the start point instead to avoid the NULL 1561 * return issue. 1562 */ 1563 addr = PAGE_SIZE; 1564 1565 /* 1566 * Use 256KB alignment. It is unclear why this matters for a 1567 * virtual address but it appears to fix a number of application/X 1568 * crashes and kms console switching is much faster. 1569 */ 1570 vm_object_hold(obj->vm_obj); 1571 vm_object_reference_locked(obj->vm_obj); 1572 vm_object_drop(obj->vm_obj); 1573 1574 rv = vm_map_find(map, obj->vm_obj, NULL, 1575 args->offset, &addr, args->size, 1576 256 * 1024, /* align */ 1577 TRUE, /* fitit */ 1578 VM_MAPTYPE_NORMAL, /* maptype */ 1579 VM_PROT_READ | VM_PROT_WRITE, /* prot */ 1580 VM_PROT_READ | VM_PROT_WRITE, /* max */ 1581 MAP_SHARED /* cow */); 1582 if (rv != KERN_SUCCESS) { 1583 vm_object_deallocate(obj->vm_obj); 1584 error = -vm_mmap_to_errno(rv); 1585 } else { 1586 args->addr_ptr = (uint64_t)addr; 1587 } 1588 out: 1589 drm_gem_object_unreference(obj); 1590 return (error); 1591 } 1592 1593 /** 1594 * i915_gem_fault - fault a page into the GTT 1595 * 1596 * vm_obj is locked on entry and expected to be locked on return. 1597 * 1598 * The vm_pager has placemarked the object with an anonymous memory page 1599 * which we must replace atomically to avoid races against concurrent faults 1600 * on the same page. XXX we currently are unable to do this atomically. 1601 * 1602 * If we are to return an error we should not touch the anonymous page, 1603 * the caller will deallocate it. 1604 * 1605 * XXX Most GEM calls appear to be interruptable, but we can't hard loop 1606 * in that case. Release all resources and wait 1 tick before retrying. 1607 * This is a huge problem which needs to be fixed by getting rid of most 1608 * of the interruptability. The linux code does not retry but does appear 1609 * to have some sort of mechanism (VM_FAULT_NOPAGE ?) for the higher level 1610 * to be able to retry. 1611 * 1612 * -- 1613 * 1614 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1615 * from userspace. The fault handler takes care of binding the object to 1616 * the GTT (if needed), allocating and programming a fence register (again, 1617 * only if needed based on whether the old reg is still valid or the object 1618 * is tiled) and inserting a new PTE into the faulting process. 1619 * 1620 * Note that the faulting process may involve evicting existing objects 1621 * from the GTT and/or fence registers to make room. So performance may 1622 * suffer if the GTT working set is large or there are few fence registers 1623 * left. 1624 * 1625 * vm_obj is locked on entry and expected to be locked on return. The VM 1626 * pager has placed an anonymous memory page at (obj,offset) which we have 1627 * to replace. 1628 */ 1629 int i915_gem_fault(vm_object_t vm_obj, vm_ooffset_t offset, int prot, vm_page_t *mres) 1630 { 1631 struct drm_i915_gem_object *obj = to_intel_bo(vm_obj->handle); 1632 struct drm_device *dev = obj->base.dev; 1633 struct drm_i915_private *dev_priv = dev->dev_private; 1634 unsigned long page_offset; 1635 vm_page_t m, oldm = NULL; 1636 int ret = 0; 1637 int didpip = 0; 1638 bool write = !!(prot & VM_PROT_WRITE); 1639 1640 intel_runtime_pm_get(dev_priv); 1641 1642 /* We don't use vmf->pgoff since that has the fake offset */ 1643 page_offset = (unsigned long)offset; 1644 1645 retry: 1646 ret = i915_mutex_lock_interruptible(dev); 1647 if (ret) 1648 goto out; 1649 1650 trace_i915_gem_object_fault(obj, page_offset, true, write); 1651 1652 /* Try to flush the object off the GPU first without holding the lock. 1653 * Upon reacquiring the lock, we will perform our sanity checks and then 1654 * repeat the flush holding the lock in the normal manner to catch cases 1655 * where we are gazumped. 1656 */ 1657 ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write); 1658 if (ret) 1659 goto unlock; 1660 1661 /* Access to snoopable pages through the GTT is incoherent. */ 1662 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) { 1663 ret = -EFAULT; 1664 goto unlock; 1665 } 1666 1667 /* Now bind it into the GTT if needed */ 1668 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE); 1669 if (ret) 1670 goto unlock; 1671 1672 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1673 if (ret) 1674 goto unpin; 1675 1676 ret = i915_gem_object_get_fence(obj); 1677 if (ret) 1678 goto unpin; 1679 1680 /* 1681 * START FREEBSD MAGIC 1682 * 1683 * Add a pip count to avoid destruction and certain other 1684 * complex operations (such as collapses?) while unlocked. 1685 */ 1686 if (didpip == 0) { 1687 vm_object_pip_add(vm_obj, 1); 1688 didpip = 1; 1689 } 1690 1691 /* 1692 * XXX We must currently remove the placeholder page now to avoid 1693 * a deadlock against a concurrent i915_gem_release_mmap(). 1694 * Otherwise concurrent operation will block on the busy page 1695 * while holding locks which we need to obtain. 1696 */ 1697 if (*mres != NULL) { 1698 oldm = *mres; 1699 if ((oldm->flags & PG_BUSY) == 0) 1700 kprintf("i915_gem_fault: Page was not busy\n"); 1701 else 1702 vm_page_remove(oldm); 1703 *mres = NULL; 1704 } else { 1705 oldm = NULL; 1706 } 1707 1708 ret = 0; 1709 m = NULL; 1710 1711 /* 1712 * Since the object lock was dropped, another thread might have 1713 * faulted on the same GTT address and instantiated the mapping. 1714 * Recheck. 1715 */ 1716 m = vm_page_lookup(vm_obj, OFF_TO_IDX(offset)); 1717 if (m != NULL) { 1718 /* 1719 * Try to busy the page, retry on failure (non-zero ret). 1720 */ 1721 if (vm_page_busy_try(m, false)) { 1722 kprintf("i915_gem_fault: PG_BUSY\n"); 1723 ret = -EINTR; 1724 goto unlock; 1725 } 1726 goto have_page; 1727 } 1728 /* 1729 * END FREEBSD MAGIC 1730 */ 1731 1732 obj->fault_mappable = true; 1733 1734 m = vm_phys_fictitious_to_vm_page(dev_priv->gtt.mappable_base + 1735 i915_gem_obj_ggtt_offset(obj) + 1736 offset); 1737 if (m == NULL) { 1738 ret = -EFAULT; 1739 goto unpin; 1740 } 1741 KASSERT((m->flags & PG_FICTITIOUS) != 0, ("not fictitious %p", m)); 1742 KASSERT(m->wire_count == 1, ("wire_count not 1 %p", m)); 1743 1744 /* 1745 * Try to busy the page. Fails on non-zero return. 1746 */ 1747 if (vm_page_busy_try(m, false)) { 1748 kprintf("i915_gem_fault: PG_BUSY(2)\n"); 1749 ret = -EINTR; 1750 goto unpin; 1751 } 1752 m->valid = VM_PAGE_BITS_ALL; 1753 1754 /* 1755 * Finally, remap it using the new GTT offset. 1756 * 1757 * (object expected to be in a locked state) 1758 */ 1759 vm_page_insert(m, vm_obj, OFF_TO_IDX(offset)); 1760 have_page: 1761 *mres = m; 1762 1763 i915_gem_object_ggtt_unpin(obj); 1764 mutex_unlock(&dev->struct_mutex); 1765 ret = VM_PAGER_OK; 1766 goto done; 1767 1768 /* 1769 * ALTERNATIVE ERROR RETURN. 1770 * 1771 * OBJECT EXPECTED TO BE LOCKED. 1772 */ 1773 unpin: 1774 i915_gem_object_ggtt_unpin(obj); 1775 unlock: 1776 mutex_unlock(&dev->struct_mutex); 1777 out: 1778 switch (ret) { 1779 case -EIO: 1780 /* 1781 * We eat errors when the gpu is terminally wedged to avoid 1782 * userspace unduly crashing (gl has no provisions for mmaps to 1783 * fail). But any other -EIO isn't ours (e.g. swap in failure) 1784 * and so needs to be reported. 1785 */ 1786 if (!i915_terminally_wedged(&dev_priv->gpu_error)) { 1787 // ret = VM_FAULT_SIGBUS; 1788 break; 1789 } 1790 /* fall through */ 1791 case -EAGAIN: 1792 /* 1793 * EAGAIN means the gpu is hung and we'll wait for the error 1794 * handler to reset everything when re-faulting in 1795 * i915_mutex_lock_interruptible. 1796 */ 1797 /* fall through */ 1798 case -ERESTARTSYS: 1799 case -EINTR: 1800 VM_OBJECT_UNLOCK(vm_obj); 1801 int dummy; 1802 tsleep(&dummy, 0, "delay", 1); /* XXX */ 1803 VM_OBJECT_LOCK(vm_obj); 1804 goto retry; 1805 default: 1806 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 1807 ret = VM_PAGER_ERROR; 1808 break; 1809 } 1810 1811 done: 1812 if (oldm != NULL) 1813 vm_page_free(oldm); 1814 if (didpip) 1815 vm_object_pip_wakeup(vm_obj); 1816 1817 intel_runtime_pm_put(dev_priv); 1818 return ret; 1819 } 1820 1821 /** 1822 * i915_gem_release_mmap - remove physical page mappings 1823 * @obj: obj in question 1824 * 1825 * Preserve the reservation of the mmapping with the DRM core code, but 1826 * relinquish ownership of the pages back to the system. 1827 * 1828 * It is vital that we remove the page mapping if we have mapped a tiled 1829 * object through the GTT and then lose the fence register due to 1830 * resource pressure. Similarly if the object has been moved out of the 1831 * aperture, than pages mapped into userspace must be revoked. Removing the 1832 * mapping will then trigger a page fault on the next user access, allowing 1833 * fixup by i915_gem_fault(). 1834 */ 1835 void 1836 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 1837 { 1838 vm_object_t devobj; 1839 vm_page_t m; 1840 int i, page_count; 1841 1842 if (!obj->fault_mappable) 1843 return; 1844 1845 devobj = cdev_pager_lookup(obj); 1846 if (devobj != NULL) { 1847 page_count = OFF_TO_IDX(obj->base.size); 1848 1849 VM_OBJECT_LOCK(devobj); 1850 for (i = 0; i < page_count; i++) { 1851 m = vm_page_lookup_busy_wait(devobj, i, TRUE, "915unm"); 1852 if (m == NULL) 1853 continue; 1854 cdev_pager_free_page(devobj, m); 1855 } 1856 VM_OBJECT_UNLOCK(devobj); 1857 vm_object_deallocate(devobj); 1858 } 1859 1860 obj->fault_mappable = false; 1861 } 1862 1863 void 1864 i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv) 1865 { 1866 struct drm_i915_gem_object *obj; 1867 1868 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) 1869 i915_gem_release_mmap(obj); 1870 } 1871 1872 uint32_t 1873 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) 1874 { 1875 uint32_t gtt_size; 1876 1877 if (INTEL_INFO(dev)->gen >= 4 || 1878 tiling_mode == I915_TILING_NONE) 1879 return size; 1880 1881 /* Previous chips need a power-of-two fence region when tiling */ 1882 if (INTEL_INFO(dev)->gen == 3) 1883 gtt_size = 1024*1024; 1884 else 1885 gtt_size = 512*1024; 1886 1887 while (gtt_size < size) 1888 gtt_size <<= 1; 1889 1890 return gtt_size; 1891 } 1892 1893 /** 1894 * i915_gem_get_gtt_alignment - return required GTT alignment for an object 1895 * @obj: object to check 1896 * 1897 * Return the required GTT alignment for an object, taking into account 1898 * potential fence register mapping. 1899 */ 1900 uint32_t 1901 i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size, 1902 int tiling_mode, bool fenced) 1903 { 1904 /* 1905 * Minimum alignment is 4k (GTT page size), but might be greater 1906 * if a fence register is needed for the object. 1907 */ 1908 if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) || 1909 tiling_mode == I915_TILING_NONE) 1910 return 4096; 1911 1912 /* 1913 * Previous chips need to be aligned to the size of the smallest 1914 * fence register that can contain the object. 1915 */ 1916 return i915_gem_get_gtt_size(dev, size, tiling_mode); 1917 } 1918 1919 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) 1920 { 1921 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 1922 int ret; 1923 1924 #if 0 1925 if (drm_vma_node_has_offset(&obj->base.vma_node)) 1926 return 0; 1927 #endif 1928 1929 dev_priv->mm.shrinker_no_lock_stealing = true; 1930 1931 ret = drm_gem_create_mmap_offset(&obj->base); 1932 if (ret != -ENOSPC) 1933 goto out; 1934 1935 /* Badly fragmented mmap space? The only way we can recover 1936 * space is by destroying unwanted objects. We can't randomly release 1937 * mmap_offsets as userspace expects them to be persistent for the 1938 * lifetime of the objects. The closest we can is to release the 1939 * offsets on purgeable objects by truncating it and marking it purged, 1940 * which prevents userspace from ever using that object again. 1941 */ 1942 i915_gem_shrink(dev_priv, 1943 obj->base.size >> PAGE_SHIFT, 1944 I915_SHRINK_BOUND | 1945 I915_SHRINK_UNBOUND | 1946 I915_SHRINK_PURGEABLE); 1947 ret = drm_gem_create_mmap_offset(&obj->base); 1948 if (ret != -ENOSPC) 1949 goto out; 1950 1951 i915_gem_shrink_all(dev_priv); 1952 ret = drm_gem_create_mmap_offset(&obj->base); 1953 out: 1954 dev_priv->mm.shrinker_no_lock_stealing = false; 1955 1956 return ret; 1957 } 1958 1959 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) 1960 { 1961 drm_gem_free_mmap_offset(&obj->base); 1962 } 1963 1964 int 1965 i915_gem_mmap_gtt(struct drm_file *file, 1966 struct drm_device *dev, 1967 uint32_t handle, 1968 uint64_t *offset) 1969 { 1970 struct drm_i915_private *dev_priv = dev->dev_private; 1971 struct drm_i915_gem_object *obj; 1972 int ret; 1973 1974 ret = i915_mutex_lock_interruptible(dev); 1975 if (ret) 1976 return ret; 1977 1978 obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle)); 1979 if (&obj->base == NULL) { 1980 ret = -ENOENT; 1981 goto unlock; 1982 } 1983 1984 if (obj->base.size > dev_priv->gtt.mappable_end) { 1985 ret = -E2BIG; 1986 goto out; 1987 } 1988 1989 if (obj->madv != I915_MADV_WILLNEED) { 1990 DRM_DEBUG("Attempting to mmap a purgeable buffer\n"); 1991 ret = -EFAULT; 1992 goto out; 1993 } 1994 1995 ret = i915_gem_object_create_mmap_offset(obj); 1996 if (ret) 1997 goto out; 1998 1999 *offset = DRM_GEM_MAPPING_OFF(obj->base.map_list.key) | 2000 DRM_GEM_MAPPING_KEY; 2001 2002 out: 2003 drm_gem_object_unreference(&obj->base); 2004 unlock: 2005 mutex_unlock(&dev->struct_mutex); 2006 return ret; 2007 } 2008 2009 /** 2010 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 2011 * @dev: DRM device 2012 * @data: GTT mapping ioctl data 2013 * @file: GEM object info 2014 * 2015 * Simply returns the fake offset to userspace so it can mmap it. 2016 * The mmap call will end up in drm_gem_mmap(), which will set things 2017 * up so we can get faults in the handler above. 2018 * 2019 * The fault handler will take care of binding the object into the GTT 2020 * (since it may have been evicted to make room for something), allocating 2021 * a fence register, and mapping the appropriate aperture address into 2022 * userspace. 2023 */ 2024 int 2025 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 2026 struct drm_file *file) 2027 { 2028 struct drm_i915_gem_mmap_gtt *args = data; 2029 2030 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); 2031 } 2032 2033 static inline int 2034 i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj) 2035 { 2036 return obj->madv == I915_MADV_DONTNEED; 2037 } 2038 2039 /* Immediately discard the backing storage */ 2040 static void 2041 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 2042 { 2043 vm_object_t vm_obj; 2044 2045 vm_obj = obj->base.vm_obj; 2046 VM_OBJECT_LOCK(vm_obj); 2047 vm_object_page_remove(vm_obj, 0, 0, false); 2048 VM_OBJECT_UNLOCK(vm_obj); 2049 2050 obj->madv = __I915_MADV_PURGED; 2051 } 2052 2053 /* Try to discard unwanted pages */ 2054 static void 2055 i915_gem_object_invalidate(struct drm_i915_gem_object *obj) 2056 { 2057 #if 0 2058 struct address_space *mapping; 2059 #endif 2060 2061 switch (obj->madv) { 2062 case I915_MADV_DONTNEED: 2063 i915_gem_object_truncate(obj); 2064 case __I915_MADV_PURGED: 2065 return; 2066 } 2067 2068 #if 0 2069 if (obj->base.filp == NULL) 2070 return; 2071 2072 mapping = file_inode(obj->base.filp)->i_mapping, 2073 invalidate_mapping_pages(mapping, 0, (loff_t)-1); 2074 #endif 2075 } 2076 2077 static void 2078 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) 2079 { 2080 int page_count = obj->base.size / PAGE_SIZE; 2081 int i, ret; 2082 2083 if (!obj->pages) 2084 return; 2085 2086 BUG_ON(obj->madv == __I915_MADV_PURGED); 2087 2088 ret = i915_gem_object_set_to_cpu_domain(obj, true); 2089 if (ret) { 2090 /* In the event of a disaster, abandon all caches and 2091 * hope for the best. 2092 */ 2093 WARN_ON(ret != -EIO); 2094 i915_gem_clflush_object(obj, true); 2095 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 2096 } 2097 2098 if (i915_gem_object_needs_bit17_swizzle(obj)) 2099 i915_gem_object_save_bit_17_swizzle(obj); 2100 2101 if (obj->madv == I915_MADV_DONTNEED) 2102 obj->dirty = 0; 2103 2104 for (i = 0; i < page_count; i++) { 2105 struct vm_page *page = obj->pages[i]; 2106 2107 if (obj->dirty) 2108 set_page_dirty(page); 2109 2110 if (obj->madv == I915_MADV_WILLNEED) 2111 mark_page_accessed(page); 2112 2113 vm_page_busy_wait(obj->pages[i], FALSE, "i915gem"); 2114 vm_page_unwire(obj->pages[i], 1); 2115 vm_page_wakeup(obj->pages[i]); 2116 } 2117 obj->dirty = 0; 2118 2119 kfree(obj->pages); 2120 obj->pages = NULL; 2121 } 2122 2123 int 2124 i915_gem_object_put_pages(struct drm_i915_gem_object *obj) 2125 { 2126 const struct drm_i915_gem_object_ops *ops = obj->ops; 2127 2128 if (obj->pages == NULL) 2129 return 0; 2130 2131 if (obj->pages_pin_count) 2132 return -EBUSY; 2133 2134 BUG_ON(i915_gem_obj_bound_any(obj)); 2135 2136 /* ->put_pages might need to allocate memory for the bit17 swizzle 2137 * array, hence protect them from being reaped by removing them from gtt 2138 * lists early. */ 2139 list_del(&obj->global_list); 2140 2141 ops->put_pages(obj); 2142 obj->pages = NULL; 2143 2144 i915_gem_object_invalidate(obj); 2145 2146 return 0; 2147 } 2148 2149 unsigned long 2150 i915_gem_shrink(struct drm_i915_private *dev_priv, 2151 long target, unsigned flags) 2152 { 2153 const struct { 2154 struct list_head *list; 2155 unsigned int bit; 2156 } phases[] = { 2157 { &dev_priv->mm.unbound_list, I915_SHRINK_UNBOUND }, 2158 { &dev_priv->mm.bound_list, I915_SHRINK_BOUND }, 2159 { NULL, 0 }, 2160 }, *phase; 2161 unsigned long count = 0; 2162 2163 /* 2164 * As we may completely rewrite the (un)bound list whilst unbinding 2165 * (due to retiring requests) we have to strictly process only 2166 * one element of the list at the time, and recheck the list 2167 * on every iteration. 2168 * 2169 * In particular, we must hold a reference whilst removing the 2170 * object as we may end up waiting for and/or retiring the objects. 2171 * This might release the final reference (held by the active list) 2172 * and result in the object being freed from under us. This is 2173 * similar to the precautions the eviction code must take whilst 2174 * removing objects. 2175 * 2176 * Also note that although these lists do not hold a reference to 2177 * the object we can safely grab one here: The final object 2178 * unreferencing and the bound_list are both protected by the 2179 * dev->struct_mutex and so we won't ever be able to observe an 2180 * object on the bound_list with a reference count equals 0. 2181 */ 2182 for (phase = phases; phase->list; phase++) { 2183 struct list_head still_in_list; 2184 2185 if ((flags & phase->bit) == 0) 2186 continue; 2187 2188 INIT_LIST_HEAD(&still_in_list); 2189 while (count < target && !list_empty(phase->list)) { 2190 struct drm_i915_gem_object *obj; 2191 struct i915_vma *vma, *v; 2192 2193 obj = list_first_entry(phase->list, 2194 typeof(*obj), global_list); 2195 list_move_tail(&obj->global_list, &still_in_list); 2196 2197 if (flags & I915_SHRINK_PURGEABLE && 2198 !i915_gem_object_is_purgeable(obj)) 2199 continue; 2200 2201 drm_gem_object_reference(&obj->base); 2202 2203 /* For the unbound phase, this should be a no-op! */ 2204 list_for_each_entry_safe(vma, v, 2205 &obj->vma_list, vma_link) 2206 if (i915_vma_unbind(vma)) 2207 break; 2208 2209 if (i915_gem_object_put_pages(obj) == 0) 2210 count += obj->base.size >> PAGE_SHIFT; 2211 2212 drm_gem_object_unreference(&obj->base); 2213 } 2214 list_splice(&still_in_list, phase->list); 2215 } 2216 2217 return count; 2218 } 2219 2220 static unsigned long 2221 i915_gem_shrink_all(struct drm_i915_private *dev_priv) 2222 { 2223 i915_gem_evict_everything(dev_priv->dev); 2224 return i915_gem_shrink(dev_priv, LONG_MAX, 2225 I915_SHRINK_BOUND | I915_SHRINK_UNBOUND); 2226 } 2227 2228 static int 2229 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 2230 { 2231 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2232 int page_count, i, j; 2233 vm_object_t vm_obj; 2234 struct vm_page *page; 2235 2236 /* Assert that the object is not currently in any GPU domain. As it 2237 * wasn't in the GTT, there shouldn't be any way it could have been in 2238 * a GPU cache 2239 */ 2240 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); 2241 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); 2242 2243 page_count = obj->base.size / PAGE_SIZE; 2244 obj->pages = kmalloc(page_count * sizeof(vm_page_t), M_DRM, 2245 M_WAITOK); 2246 2247 /* Get the list of pages out of our struct file. They'll be pinned 2248 * at this point until we release them. 2249 * 2250 * Fail silently without starting the shrinker 2251 */ 2252 vm_obj = obj->base.vm_obj; 2253 VM_OBJECT_LOCK(vm_obj); 2254 for (i = 0; i < page_count; i++) { 2255 page = shmem_read_mapping_page(vm_obj, i); 2256 if (IS_ERR(page)) { 2257 i915_gem_shrink(dev_priv, 2258 page_count, 2259 I915_SHRINK_BOUND | 2260 I915_SHRINK_UNBOUND | 2261 I915_SHRINK_PURGEABLE); 2262 page = shmem_read_mapping_page(vm_obj, i); 2263 } 2264 if (IS_ERR(page)) { 2265 /* We've tried hard to allocate the memory by reaping 2266 * our own buffer, now let the real VM do its job and 2267 * go down in flames if truly OOM. 2268 */ 2269 2270 i915_gem_shrink_all(dev_priv); 2271 page = shmem_read_mapping_page(vm_obj, i); 2272 if (IS_ERR(page)) 2273 goto err_pages; 2274 } 2275 #ifdef CONFIG_SWIOTLB 2276 if (swiotlb_nr_tbl()) { 2277 st->nents++; 2278 sg_set_page(sg, page, PAGE_SIZE, 0); 2279 sg = sg_next(sg); 2280 continue; 2281 } 2282 #endif 2283 obj->pages[i] = page; 2284 } 2285 #ifdef CONFIG_SWIOTLB 2286 if (!swiotlb_nr_tbl()) 2287 #endif 2288 VM_OBJECT_UNLOCK(vm_obj); 2289 2290 if (i915_gem_object_needs_bit17_swizzle(obj)) 2291 i915_gem_object_do_bit_17_swizzle(obj); 2292 2293 if (obj->tiling_mode != I915_TILING_NONE && 2294 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) 2295 i915_gem_object_pin_pages(obj); 2296 2297 return 0; 2298 2299 err_pages: 2300 for (j = 0; j < i; j++) { 2301 page = obj->pages[j]; 2302 vm_page_busy_wait(page, FALSE, "i915gem"); 2303 vm_page_unwire(page, 0); 2304 vm_page_wakeup(page); 2305 } 2306 VM_OBJECT_UNLOCK(vm_obj); 2307 kfree(obj->pages); 2308 obj->pages = NULL; 2309 return (-EIO); 2310 } 2311 2312 /* Ensure that the associated pages are gathered from the backing storage 2313 * and pinned into our object. i915_gem_object_get_pages() may be called 2314 * multiple times before they are released by a single call to 2315 * i915_gem_object_put_pages() - once the pages are no longer referenced 2316 * either as a result of memory pressure (reaping pages under the shrinker) 2317 * or as the object is itself released. 2318 */ 2319 int 2320 i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2321 { 2322 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2323 const struct drm_i915_gem_object_ops *ops = obj->ops; 2324 int ret; 2325 2326 if (obj->pages) 2327 return 0; 2328 2329 if (obj->madv != I915_MADV_WILLNEED) { 2330 DRM_DEBUG("Attempting to obtain a purgeable object\n"); 2331 return -EFAULT; 2332 } 2333 2334 BUG_ON(obj->pages_pin_count); 2335 2336 ret = ops->get_pages(obj); 2337 if (ret) 2338 return ret; 2339 2340 list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list); 2341 return 0; 2342 } 2343 2344 static void 2345 i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, 2346 struct intel_engine_cs *ring) 2347 { 2348 struct drm_i915_gem_request *req; 2349 struct intel_engine_cs *old_ring; 2350 2351 BUG_ON(ring == NULL); 2352 2353 req = intel_ring_get_request(ring); 2354 old_ring = i915_gem_request_get_ring(obj->last_read_req); 2355 2356 if (old_ring != ring && obj->last_write_req) { 2357 /* Keep the request relative to the current ring */ 2358 i915_gem_request_assign(&obj->last_write_req, req); 2359 } 2360 2361 /* Add a reference if we're newly entering the active list. */ 2362 if (!obj->active) { 2363 drm_gem_object_reference(&obj->base); 2364 obj->active = 1; 2365 } 2366 2367 list_move_tail(&obj->ring_list, &ring->active_list); 2368 2369 i915_gem_request_assign(&obj->last_read_req, req); 2370 } 2371 2372 void i915_vma_move_to_active(struct i915_vma *vma, 2373 struct intel_engine_cs *ring) 2374 { 2375 list_move_tail(&vma->mm_list, &vma->vm->active_list); 2376 return i915_gem_object_move_to_active(vma->obj, ring); 2377 } 2378 2379 static void 2380 i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj) 2381 { 2382 struct i915_vma *vma; 2383 2384 BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS); 2385 BUG_ON(!obj->active); 2386 2387 list_for_each_entry(vma, &obj->vma_list, vma_link) { 2388 if (!list_empty(&vma->mm_list)) 2389 list_move_tail(&vma->mm_list, &vma->vm->inactive_list); 2390 } 2391 2392 intel_fb_obj_flush(obj, true); 2393 2394 list_del_init(&obj->ring_list); 2395 2396 i915_gem_request_assign(&obj->last_read_req, NULL); 2397 i915_gem_request_assign(&obj->last_write_req, NULL); 2398 obj->base.write_domain = 0; 2399 2400 i915_gem_request_assign(&obj->last_fenced_req, NULL); 2401 2402 obj->active = 0; 2403 drm_gem_object_unreference(&obj->base); 2404 2405 WARN_ON(i915_verify_lists(dev)); 2406 } 2407 2408 static void 2409 i915_gem_object_retire(struct drm_i915_gem_object *obj) 2410 { 2411 if (obj->last_read_req == NULL) 2412 return; 2413 2414 if (i915_gem_request_completed(obj->last_read_req, true)) 2415 i915_gem_object_move_to_inactive(obj); 2416 } 2417 2418 static int 2419 i915_gem_init_seqno(struct drm_device *dev, u32 seqno) 2420 { 2421 struct drm_i915_private *dev_priv = dev->dev_private; 2422 struct intel_engine_cs *ring; 2423 int ret, i, j; 2424 2425 /* Carefully retire all requests without writing to the rings */ 2426 for_each_ring(ring, dev_priv, i) { 2427 ret = intel_ring_idle(ring); 2428 if (ret) 2429 return ret; 2430 } 2431 i915_gem_retire_requests(dev); 2432 2433 /* Finally reset hw state */ 2434 for_each_ring(ring, dev_priv, i) { 2435 intel_ring_init_seqno(ring, seqno); 2436 2437 for (j = 0; j < ARRAY_SIZE(ring->semaphore.sync_seqno); j++) 2438 ring->semaphore.sync_seqno[j] = 0; 2439 } 2440 2441 return 0; 2442 } 2443 2444 int i915_gem_set_seqno(struct drm_device *dev, u32 seqno) 2445 { 2446 struct drm_i915_private *dev_priv = dev->dev_private; 2447 int ret; 2448 2449 if (seqno == 0) 2450 return -EINVAL; 2451 2452 /* HWS page needs to be set less than what we 2453 * will inject to ring 2454 */ 2455 ret = i915_gem_init_seqno(dev, seqno - 1); 2456 if (ret) 2457 return ret; 2458 2459 /* Carefully set the last_seqno value so that wrap 2460 * detection still works 2461 */ 2462 dev_priv->next_seqno = seqno; 2463 dev_priv->last_seqno = seqno - 1; 2464 if (dev_priv->last_seqno == 0) 2465 dev_priv->last_seqno--; 2466 2467 return 0; 2468 } 2469 2470 int 2471 i915_gem_get_seqno(struct drm_device *dev, u32 *seqno) 2472 { 2473 struct drm_i915_private *dev_priv = dev->dev_private; 2474 2475 /* reserve 0 for non-seqno */ 2476 if (dev_priv->next_seqno == 0) { 2477 int ret = i915_gem_init_seqno(dev, 0); 2478 if (ret) 2479 return ret; 2480 2481 dev_priv->next_seqno = 1; 2482 } 2483 2484 *seqno = dev_priv->last_seqno = dev_priv->next_seqno++; 2485 return 0; 2486 } 2487 2488 int __i915_add_request(struct intel_engine_cs *ring, 2489 struct drm_file *file, 2490 struct drm_i915_gem_object *obj) 2491 { 2492 struct drm_i915_private *dev_priv = ring->dev->dev_private; 2493 struct drm_i915_gem_request *request; 2494 struct intel_ringbuffer *ringbuf; 2495 u32 request_start; 2496 int ret; 2497 2498 request = ring->outstanding_lazy_request; 2499 if (WARN_ON(request == NULL)) 2500 return -ENOMEM; 2501 2502 if (i915.enable_execlists) { 2503 ringbuf = request->ctx->engine[ring->id].ringbuf; 2504 } else 2505 ringbuf = ring->buffer; 2506 2507 request_start = intel_ring_get_tail(ringbuf); 2508 /* 2509 * Emit any outstanding flushes - execbuf can fail to emit the flush 2510 * after having emitted the batchbuffer command. Hence we need to fix 2511 * things up similar to emitting the lazy request. The difference here 2512 * is that the flush _must_ happen before the next request, no matter 2513 * what. 2514 */ 2515 if (i915.enable_execlists) { 2516 ret = logical_ring_flush_all_caches(ringbuf, request->ctx); 2517 if (ret) 2518 return ret; 2519 } else { 2520 ret = intel_ring_flush_all_caches(ring); 2521 if (ret) 2522 return ret; 2523 } 2524 2525 /* Record the position of the start of the request so that 2526 * should we detect the updated seqno part-way through the 2527 * GPU processing the request, we never over-estimate the 2528 * position of the head. 2529 */ 2530 request->postfix = intel_ring_get_tail(ringbuf); 2531 2532 if (i915.enable_execlists) { 2533 ret = ring->emit_request(ringbuf, request); 2534 if (ret) 2535 return ret; 2536 } else { 2537 ret = ring->add_request(ring); 2538 if (ret) 2539 return ret; 2540 } 2541 2542 request->head = request_start; 2543 request->tail = intel_ring_get_tail(ringbuf); 2544 2545 /* Whilst this request exists, batch_obj will be on the 2546 * active_list, and so will hold the active reference. Only when this 2547 * request is retired will the the batch_obj be moved onto the 2548 * inactive_list and lose its active reference. Hence we do not need 2549 * to explicitly hold another reference here. 2550 */ 2551 request->batch_obj = obj; 2552 2553 if (!i915.enable_execlists) { 2554 /* Hold a reference to the current context so that we can inspect 2555 * it later in case a hangcheck error event fires. 2556 */ 2557 request->ctx = ring->last_context; 2558 if (request->ctx) 2559 i915_gem_context_reference(request->ctx); 2560 } 2561 2562 request->emitted_jiffies = jiffies; 2563 list_add_tail(&request->list, &ring->request_list); 2564 request->file_priv = NULL; 2565 2566 if (file) { 2567 struct drm_i915_file_private *file_priv = file->driver_priv; 2568 2569 spin_lock(&file_priv->mm.lock); 2570 request->file_priv = file_priv; 2571 list_add_tail(&request->client_list, 2572 &file_priv->mm.request_list); 2573 spin_unlock(&file_priv->mm.lock); 2574 } 2575 2576 trace_i915_gem_request_add(request); 2577 ring->outstanding_lazy_request = NULL; 2578 2579 i915_queue_hangcheck(ring->dev); 2580 2581 cancel_delayed_work_sync(&dev_priv->mm.idle_work); 2582 queue_delayed_work(dev_priv->wq, 2583 &dev_priv->mm.retire_work, 2584 round_jiffies_up_relative(HZ)); 2585 intel_mark_busy(dev_priv->dev); 2586 2587 return 0; 2588 } 2589 2590 static inline void 2591 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) 2592 { 2593 struct drm_i915_file_private *file_priv = request->file_priv; 2594 2595 if (!file_priv) 2596 return; 2597 2598 spin_lock(&file_priv->mm.lock); 2599 list_del(&request->client_list); 2600 request->file_priv = NULL; 2601 spin_unlock(&file_priv->mm.lock); 2602 } 2603 2604 static bool i915_context_is_banned(struct drm_i915_private *dev_priv, 2605 const struct intel_context *ctx) 2606 { 2607 unsigned long elapsed; 2608 2609 elapsed = get_seconds() - ctx->hang_stats.guilty_ts; 2610 2611 if (ctx->hang_stats.banned) 2612 return true; 2613 2614 if (ctx->hang_stats.ban_period_seconds && 2615 elapsed <= ctx->hang_stats.ban_period_seconds) { 2616 if (!i915_gem_context_is_default(ctx)) { 2617 DRM_DEBUG("context hanging too fast, banning!\n"); 2618 return true; 2619 } else if (i915_stop_ring_allow_ban(dev_priv)) { 2620 if (i915_stop_ring_allow_warn(dev_priv)) 2621 DRM_ERROR("gpu hanging too fast, banning!\n"); 2622 return true; 2623 } 2624 } 2625 2626 return false; 2627 } 2628 2629 static void i915_set_reset_status(struct drm_i915_private *dev_priv, 2630 struct intel_context *ctx, 2631 const bool guilty) 2632 { 2633 struct i915_ctx_hang_stats *hs; 2634 2635 if (WARN_ON(!ctx)) 2636 return; 2637 2638 hs = &ctx->hang_stats; 2639 2640 if (guilty) { 2641 hs->banned = i915_context_is_banned(dev_priv, ctx); 2642 hs->batch_active++; 2643 hs->guilty_ts = get_seconds(); 2644 } else { 2645 hs->batch_pending++; 2646 } 2647 } 2648 2649 static void i915_gem_free_request(struct drm_i915_gem_request *request) 2650 { 2651 list_del(&request->list); 2652 i915_gem_request_remove_from_client(request); 2653 2654 i915_gem_request_unreference(request); 2655 } 2656 2657 void i915_gem_request_free(struct kref *req_ref) 2658 { 2659 struct drm_i915_gem_request *req = container_of(req_ref, 2660 typeof(*req), ref); 2661 struct intel_context *ctx = req->ctx; 2662 2663 if (ctx) { 2664 if (i915.enable_execlists) { 2665 struct intel_engine_cs *ring = req->ring; 2666 2667 if (ctx != ring->default_context) 2668 intel_lr_context_unpin(ring, ctx); 2669 } 2670 2671 i915_gem_context_unreference(ctx); 2672 } 2673 2674 kfree(req); 2675 } 2676 2677 struct drm_i915_gem_request * 2678 i915_gem_find_active_request(struct intel_engine_cs *ring) 2679 { 2680 struct drm_i915_gem_request *request; 2681 2682 list_for_each_entry(request, &ring->request_list, list) { 2683 if (i915_gem_request_completed(request, false)) 2684 continue; 2685 2686 return request; 2687 } 2688 2689 return NULL; 2690 } 2691 2692 static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv, 2693 struct intel_engine_cs *ring) 2694 { 2695 struct drm_i915_gem_request *request; 2696 bool ring_hung; 2697 2698 request = i915_gem_find_active_request(ring); 2699 2700 if (request == NULL) 2701 return; 2702 2703 ring_hung = ring->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG; 2704 2705 i915_set_reset_status(dev_priv, request->ctx, ring_hung); 2706 2707 list_for_each_entry_continue(request, &ring->request_list, list) 2708 i915_set_reset_status(dev_priv, request->ctx, false); 2709 } 2710 2711 static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv, 2712 struct intel_engine_cs *ring) 2713 { 2714 while (!list_empty(&ring->active_list)) { 2715 struct drm_i915_gem_object *obj; 2716 2717 obj = list_first_entry(&ring->active_list, 2718 struct drm_i915_gem_object, 2719 ring_list); 2720 2721 i915_gem_object_move_to_inactive(obj); 2722 } 2723 2724 /* 2725 * Clear the execlists queue up before freeing the requests, as those 2726 * are the ones that keep the context and ringbuffer backing objects 2727 * pinned in place. 2728 */ 2729 while (!list_empty(&ring->execlist_queue)) { 2730 struct drm_i915_gem_request *submit_req; 2731 2732 submit_req = list_first_entry(&ring->execlist_queue, 2733 struct drm_i915_gem_request, 2734 execlist_link); 2735 list_del(&submit_req->execlist_link); 2736 intel_runtime_pm_put(dev_priv); 2737 2738 if (submit_req->ctx != ring->default_context) 2739 intel_lr_context_unpin(ring, submit_req->ctx); 2740 2741 i915_gem_request_unreference(submit_req); 2742 } 2743 2744 /* 2745 * We must free the requests after all the corresponding objects have 2746 * been moved off active lists. Which is the same order as the normal 2747 * retire_requests function does. This is important if object hold 2748 * implicit references on things like e.g. ppgtt address spaces through 2749 * the request. 2750 */ 2751 while (!list_empty(&ring->request_list)) { 2752 struct drm_i915_gem_request *request; 2753 2754 request = list_first_entry(&ring->request_list, 2755 struct drm_i915_gem_request, 2756 list); 2757 2758 i915_gem_free_request(request); 2759 } 2760 2761 /* This may not have been flushed before the reset, so clean it now */ 2762 i915_gem_request_assign(&ring->outstanding_lazy_request, NULL); 2763 } 2764 2765 void i915_gem_restore_fences(struct drm_device *dev) 2766 { 2767 struct drm_i915_private *dev_priv = dev->dev_private; 2768 int i; 2769 2770 for (i = 0; i < dev_priv->num_fence_regs; i++) { 2771 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; 2772 2773 /* 2774 * Commit delayed tiling changes if we have an object still 2775 * attached to the fence, otherwise just clear the fence. 2776 */ 2777 if (reg->obj) { 2778 i915_gem_object_update_fence(reg->obj, reg, 2779 reg->obj->tiling_mode); 2780 } else { 2781 i915_gem_write_fence(dev, i, NULL); 2782 } 2783 } 2784 } 2785 2786 void i915_gem_reset(struct drm_device *dev) 2787 { 2788 struct drm_i915_private *dev_priv = dev->dev_private; 2789 struct intel_engine_cs *ring; 2790 int i; 2791 2792 /* 2793 * Before we free the objects from the requests, we need to inspect 2794 * them for finding the guilty party. As the requests only borrow 2795 * their reference to the objects, the inspection must be done first. 2796 */ 2797 for_each_ring(ring, dev_priv, i) 2798 i915_gem_reset_ring_status(dev_priv, ring); 2799 2800 for_each_ring(ring, dev_priv, i) 2801 i915_gem_reset_ring_cleanup(dev_priv, ring); 2802 2803 i915_gem_context_reset(dev); 2804 2805 i915_gem_restore_fences(dev); 2806 } 2807 2808 /** 2809 * This function clears the request list as sequence numbers are passed. 2810 */ 2811 void 2812 i915_gem_retire_requests_ring(struct intel_engine_cs *ring) 2813 { 2814 if (list_empty(&ring->request_list)) 2815 return; 2816 2817 WARN_ON(i915_verify_lists(ring->dev)); 2818 2819 /* Retire requests first as we use it above for the early return. 2820 * If we retire requests last, we may use a later seqno and so clear 2821 * the requests lists without clearing the active list, leading to 2822 * confusion. 2823 */ 2824 while (!list_empty(&ring->request_list)) { 2825 struct drm_i915_gem_request *request; 2826 struct intel_ringbuffer *ringbuf; 2827 2828 request = list_first_entry(&ring->request_list, 2829 struct drm_i915_gem_request, 2830 list); 2831 2832 if (!i915_gem_request_completed(request, true)) 2833 break; 2834 2835 trace_i915_gem_request_retire(request); 2836 2837 /* This is one of the few common intersection points 2838 * between legacy ringbuffer submission and execlists: 2839 * we need to tell them apart in order to find the correct 2840 * ringbuffer to which the request belongs to. 2841 */ 2842 if (i915.enable_execlists) { 2843 struct intel_context *ctx = request->ctx; 2844 ringbuf = ctx->engine[ring->id].ringbuf; 2845 } else 2846 ringbuf = ring->buffer; 2847 2848 /* We know the GPU must have read the request to have 2849 * sent us the seqno + interrupt, so use the position 2850 * of tail of the request to update the last known position 2851 * of the GPU head. 2852 */ 2853 ringbuf->last_retired_head = request->postfix; 2854 2855 i915_gem_free_request(request); 2856 } 2857 2858 /* Move any buffers on the active list that are no longer referenced 2859 * by the ringbuffer to the flushing/inactive lists as appropriate, 2860 * before we free the context associated with the requests. 2861 */ 2862 while (!list_empty(&ring->active_list)) { 2863 struct drm_i915_gem_object *obj; 2864 2865 obj = list_first_entry(&ring->active_list, 2866 struct drm_i915_gem_object, 2867 ring_list); 2868 2869 if (!i915_gem_request_completed(obj->last_read_req, true)) 2870 break; 2871 2872 i915_gem_object_move_to_inactive(obj); 2873 } 2874 2875 if (unlikely(ring->trace_irq_req && 2876 i915_gem_request_completed(ring->trace_irq_req, true))) { 2877 ring->irq_put(ring); 2878 i915_gem_request_assign(&ring->trace_irq_req, NULL); 2879 } 2880 2881 WARN_ON(i915_verify_lists(ring->dev)); 2882 } 2883 2884 bool 2885 i915_gem_retire_requests(struct drm_device *dev) 2886 { 2887 struct drm_i915_private *dev_priv = dev->dev_private; 2888 struct intel_engine_cs *ring; 2889 bool idle = true; 2890 int i; 2891 2892 for_each_ring(ring, dev_priv, i) { 2893 i915_gem_retire_requests_ring(ring); 2894 idle &= list_empty(&ring->request_list); 2895 if (i915.enable_execlists) { 2896 2897 lockmgr(&ring->execlist_lock, LK_EXCLUSIVE); 2898 idle &= list_empty(&ring->execlist_queue); 2899 lockmgr(&ring->execlist_lock, LK_RELEASE); 2900 2901 intel_execlists_retire_requests(ring); 2902 } 2903 } 2904 2905 if (idle) 2906 mod_delayed_work(dev_priv->wq, 2907 &dev_priv->mm.idle_work, 2908 msecs_to_jiffies(100)); 2909 2910 return idle; 2911 } 2912 2913 static void 2914 i915_gem_retire_work_handler(struct work_struct *work) 2915 { 2916 struct drm_i915_private *dev_priv = 2917 container_of(work, typeof(*dev_priv), mm.retire_work.work); 2918 struct drm_device *dev = dev_priv->dev; 2919 bool idle; 2920 2921 /* Come back later if the device is busy... */ 2922 idle = false; 2923 if (mutex_trylock(&dev->struct_mutex)) { 2924 idle = i915_gem_retire_requests(dev); 2925 mutex_unlock(&dev->struct_mutex); 2926 } 2927 if (!idle) 2928 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 2929 round_jiffies_up_relative(HZ)); 2930 } 2931 2932 static void 2933 i915_gem_idle_work_handler(struct work_struct *work) 2934 { 2935 struct drm_i915_private *dev_priv = 2936 container_of(work, typeof(*dev_priv), mm.idle_work.work); 2937 2938 intel_mark_idle(dev_priv->dev); 2939 } 2940 2941 /** 2942 * Ensures that an object will eventually get non-busy by flushing any required 2943 * write domains, emitting any outstanding lazy request and retiring and 2944 * completed requests. 2945 */ 2946 static int 2947 i915_gem_object_flush_active(struct drm_i915_gem_object *obj) 2948 { 2949 struct intel_engine_cs *ring; 2950 int ret; 2951 2952 if (obj->active) { 2953 ring = i915_gem_request_get_ring(obj->last_read_req); 2954 2955 ret = i915_gem_check_olr(obj->last_read_req); 2956 if (ret) 2957 return ret; 2958 2959 i915_gem_retire_requests_ring(ring); 2960 } 2961 2962 return 0; 2963 } 2964 2965 /** 2966 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT 2967 * @DRM_IOCTL_ARGS: standard ioctl arguments 2968 * 2969 * Returns 0 if successful, else an error is returned with the remaining time in 2970 * the timeout parameter. 2971 * -ETIME: object is still busy after timeout 2972 * -ERESTARTSYS: signal interrupted the wait 2973 * -ENONENT: object doesn't exist 2974 * Also possible, but rare: 2975 * -EAGAIN: GPU wedged 2976 * -ENOMEM: damn 2977 * -ENODEV: Internal IRQ fail 2978 * -E?: The add request failed 2979 * 2980 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any 2981 * non-zero timeout parameter the wait ioctl will wait for the given number of 2982 * nanoseconds on an object becoming unbusy. Since the wait itself does so 2983 * without holding struct_mutex the object may become re-busied before this 2984 * function completes. A similar but shorter * race condition exists in the busy 2985 * ioctl 2986 */ 2987 int 2988 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 2989 { 2990 struct drm_i915_private *dev_priv = dev->dev_private; 2991 struct drm_i915_gem_wait *args = data; 2992 struct drm_i915_gem_object *obj; 2993 struct drm_i915_gem_request *req; 2994 unsigned reset_counter; 2995 int ret = 0; 2996 2997 if (args->flags != 0) 2998 return -EINVAL; 2999 3000 ret = i915_mutex_lock_interruptible(dev); 3001 if (ret) 3002 return ret; 3003 3004 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->bo_handle)); 3005 if (&obj->base == NULL) { 3006 mutex_unlock(&dev->struct_mutex); 3007 return -ENOENT; 3008 } 3009 3010 /* Need to make sure the object gets inactive eventually. */ 3011 ret = i915_gem_object_flush_active(obj); 3012 if (ret) 3013 goto out; 3014 3015 if (!obj->active || !obj->last_read_req) 3016 goto out; 3017 3018 req = obj->last_read_req; 3019 3020 /* Do this after OLR check to make sure we make forward progress polling 3021 * on this IOCTL with a timeout == 0 (like busy ioctl) 3022 */ 3023 if (args->timeout_ns == 0) { 3024 ret = -ETIME; 3025 goto out; 3026 } 3027 3028 drm_gem_object_unreference(&obj->base); 3029 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 3030 i915_gem_request_reference(req); 3031 mutex_unlock(&dev->struct_mutex); 3032 3033 ret = __i915_wait_request(req, reset_counter, true, 3034 args->timeout_ns > 0 ? &args->timeout_ns : NULL, 3035 file->driver_priv); 3036 mutex_lock(&dev->struct_mutex); 3037 i915_gem_request_unreference(req); 3038 mutex_unlock(&dev->struct_mutex); 3039 return ret; 3040 3041 out: 3042 drm_gem_object_unreference(&obj->base); 3043 mutex_unlock(&dev->struct_mutex); 3044 return ret; 3045 } 3046 3047 /** 3048 * i915_gem_object_sync - sync an object to a ring. 3049 * 3050 * @obj: object which may be in use on another ring. 3051 * @to: ring we wish to use the object on. May be NULL. 3052 * 3053 * This code is meant to abstract object synchronization with the GPU. 3054 * Calling with NULL implies synchronizing the object with the CPU 3055 * rather than a particular GPU ring. 3056 * 3057 * Returns 0 if successful, else propagates up the lower layer error. 3058 */ 3059 int 3060 i915_gem_object_sync(struct drm_i915_gem_object *obj, 3061 struct intel_engine_cs *to) 3062 { 3063 struct intel_engine_cs *from; 3064 u32 seqno; 3065 int ret, idx; 3066 3067 from = i915_gem_request_get_ring(obj->last_read_req); 3068 3069 if (from == NULL || to == from) 3070 return 0; 3071 3072 if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev)) 3073 return i915_gem_object_wait_rendering(obj, false); 3074 3075 idx = intel_ring_sync_index(from, to); 3076 3077 seqno = i915_gem_request_get_seqno(obj->last_read_req); 3078 /* Optimization: Avoid semaphore sync when we are sure we already 3079 * waited for an object with higher seqno */ 3080 if (seqno <= from->semaphore.sync_seqno[idx]) 3081 return 0; 3082 3083 ret = i915_gem_check_olr(obj->last_read_req); 3084 if (ret) 3085 return ret; 3086 3087 trace_i915_gem_ring_sync_to(from, to, obj->last_read_req); 3088 ret = to->semaphore.sync_to(to, from, seqno); 3089 if (!ret) 3090 /* We use last_read_req because sync_to() 3091 * might have just caused seqno wrap under 3092 * the radar. 3093 */ 3094 from->semaphore.sync_seqno[idx] = 3095 i915_gem_request_get_seqno(obj->last_read_req); 3096 3097 return ret; 3098 } 3099 3100 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) 3101 { 3102 u32 old_write_domain, old_read_domains; 3103 3104 /* Force a pagefault for domain tracking on next user access */ 3105 i915_gem_release_mmap(obj); 3106 3107 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3108 return; 3109 3110 /* Wait for any direct GTT access to complete */ 3111 mb(); 3112 3113 old_read_domains = obj->base.read_domains; 3114 old_write_domain = obj->base.write_domain; 3115 3116 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT; 3117 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT; 3118 3119 trace_i915_gem_object_change_domain(obj, 3120 old_read_domains, 3121 old_write_domain); 3122 } 3123 3124 int i915_vma_unbind(struct i915_vma *vma) 3125 { 3126 struct drm_i915_gem_object *obj = vma->obj; 3127 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3128 int ret; 3129 3130 if (list_empty(&vma->vma_link)) 3131 return 0; 3132 3133 if (!drm_mm_node_allocated(&vma->node)) { 3134 i915_gem_vma_destroy(vma); 3135 return 0; 3136 } 3137 3138 if (vma->pin_count) 3139 return -EBUSY; 3140 3141 BUG_ON(obj->pages == NULL); 3142 3143 ret = i915_gem_object_finish_gpu(obj); 3144 if (ret) 3145 return ret; 3146 /* Continue on if we fail due to EIO, the GPU is hung so we 3147 * should be safe and we need to cleanup or else we might 3148 * cause memory corruption through use-after-free. 3149 */ 3150 3151 if (i915_is_ggtt(vma->vm) && 3152 vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { 3153 i915_gem_object_finish_gtt(obj); 3154 3155 /* release the fence reg _after_ flushing */ 3156 ret = i915_gem_object_put_fence(obj); 3157 if (ret) 3158 return ret; 3159 } 3160 3161 trace_i915_vma_unbind(vma); 3162 3163 vma->unbind_vma(vma); 3164 3165 list_del_init(&vma->mm_list); 3166 if (i915_is_ggtt(vma->vm)) { 3167 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { 3168 obj->map_and_fenceable = false; 3169 } else if (vma->ggtt_view.pages) { 3170 kfree(vma->ggtt_view.pages); 3171 vma->ggtt_view.pages = NULL; 3172 } 3173 } 3174 3175 drm_mm_remove_node(&vma->node); 3176 i915_gem_vma_destroy(vma); 3177 3178 /* Since the unbound list is global, only move to that list if 3179 * no more VMAs exist. */ 3180 if (list_empty(&obj->vma_list)) { 3181 /* Throw away the active reference before 3182 * moving to the unbound list. */ 3183 i915_gem_object_retire(obj); 3184 3185 i915_gem_gtt_finish_object(obj); 3186 list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list); 3187 } 3188 3189 /* And finally now the object is completely decoupled from this vma, 3190 * we can drop its hold on the backing storage and allow it to be 3191 * reaped by the shrinker. 3192 */ 3193 i915_gem_object_unpin_pages(obj); 3194 3195 return 0; 3196 } 3197 3198 int i915_gpu_idle(struct drm_device *dev) 3199 { 3200 struct drm_i915_private *dev_priv = dev->dev_private; 3201 struct intel_engine_cs *ring; 3202 int ret, i; 3203 3204 /* Flush everything onto the inactive list. */ 3205 for_each_ring(ring, dev_priv, i) { 3206 if (!i915.enable_execlists) { 3207 ret = i915_switch_context(ring, ring->default_context); 3208 if (ret) 3209 return ret; 3210 } 3211 3212 ret = intel_ring_idle(ring); 3213 if (ret) 3214 return ret; 3215 } 3216 3217 return 0; 3218 } 3219 3220 static void i965_write_fence_reg(struct drm_device *dev, int reg, 3221 struct drm_i915_gem_object *obj) 3222 { 3223 struct drm_i915_private *dev_priv = dev->dev_private; 3224 int fence_reg; 3225 int fence_pitch_shift; 3226 3227 if (INTEL_INFO(dev)->gen >= 6) { 3228 fence_reg = FENCE_REG_SANDYBRIDGE_0; 3229 fence_pitch_shift = SANDYBRIDGE_FENCE_PITCH_SHIFT; 3230 } else { 3231 fence_reg = FENCE_REG_965_0; 3232 fence_pitch_shift = I965_FENCE_PITCH_SHIFT; 3233 } 3234 3235 fence_reg += reg * 8; 3236 3237 /* To w/a incoherency with non-atomic 64-bit register updates, 3238 * we split the 64-bit update into two 32-bit writes. In order 3239 * for a partial fence not to be evaluated between writes, we 3240 * precede the update with write to turn off the fence register, 3241 * and only enable the fence as the last step. 3242 * 3243 * For extra levels of paranoia, we make sure each step lands 3244 * before applying the next step. 3245 */ 3246 I915_WRITE(fence_reg, 0); 3247 POSTING_READ(fence_reg); 3248 3249 if (obj) { 3250 u32 size = i915_gem_obj_ggtt_size(obj); 3251 uint64_t val; 3252 3253 val = (uint64_t)((i915_gem_obj_ggtt_offset(obj) + size - 4096) & 3254 0xfffff000) << 32; 3255 val |= i915_gem_obj_ggtt_offset(obj) & 0xfffff000; 3256 val |= (uint64_t)((obj->stride / 128) - 1) << fence_pitch_shift; 3257 if (obj->tiling_mode == I915_TILING_Y) 3258 val |= 1 << I965_FENCE_TILING_Y_SHIFT; 3259 val |= I965_FENCE_REG_VALID; 3260 3261 I915_WRITE(fence_reg + 4, val >> 32); 3262 POSTING_READ(fence_reg + 4); 3263 3264 I915_WRITE(fence_reg + 0, val); 3265 POSTING_READ(fence_reg); 3266 } else { 3267 I915_WRITE(fence_reg + 4, 0); 3268 POSTING_READ(fence_reg + 4); 3269 } 3270 } 3271 3272 static void i915_write_fence_reg(struct drm_device *dev, int reg, 3273 struct drm_i915_gem_object *obj) 3274 { 3275 struct drm_i915_private *dev_priv = dev->dev_private; 3276 u32 val; 3277 3278 if (obj) { 3279 u32 size = i915_gem_obj_ggtt_size(obj); 3280 int pitch_val; 3281 int tile_width; 3282 3283 WARN((i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK) || 3284 (size & -size) != size || 3285 (i915_gem_obj_ggtt_offset(obj) & (size - 1)), 3286 "object 0x%08lx [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n", 3287 i915_gem_obj_ggtt_offset(obj), obj->map_and_fenceable, size); 3288 3289 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)) 3290 tile_width = 128; 3291 else 3292 tile_width = 512; 3293 3294 /* Note: pitch better be a power of two tile widths */ 3295 pitch_val = obj->stride / tile_width; 3296 pitch_val = ffs(pitch_val) - 1; 3297 3298 val = i915_gem_obj_ggtt_offset(obj); 3299 if (obj->tiling_mode == I915_TILING_Y) 3300 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 3301 val |= I915_FENCE_SIZE_BITS(size); 3302 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 3303 val |= I830_FENCE_REG_VALID; 3304 } else 3305 val = 0; 3306 3307 if (reg < 8) 3308 reg = FENCE_REG_830_0 + reg * 4; 3309 else 3310 reg = FENCE_REG_945_8 + (reg - 8) * 4; 3311 3312 I915_WRITE(reg, val); 3313 POSTING_READ(reg); 3314 } 3315 3316 static void i830_write_fence_reg(struct drm_device *dev, int reg, 3317 struct drm_i915_gem_object *obj) 3318 { 3319 struct drm_i915_private *dev_priv = dev->dev_private; 3320 uint32_t val; 3321 3322 if (obj) { 3323 u32 size = i915_gem_obj_ggtt_size(obj); 3324 uint32_t pitch_val; 3325 3326 WARN((i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK) || 3327 (size & -size) != size || 3328 (i915_gem_obj_ggtt_offset(obj) & (size - 1)), 3329 "object 0x%08lx not 512K or pot-size 0x%08x aligned\n", 3330 i915_gem_obj_ggtt_offset(obj), size); 3331 3332 pitch_val = obj->stride / 128; 3333 pitch_val = ffs(pitch_val) - 1; 3334 3335 val = i915_gem_obj_ggtt_offset(obj); 3336 if (obj->tiling_mode == I915_TILING_Y) 3337 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 3338 val |= I830_FENCE_SIZE_BITS(size); 3339 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 3340 val |= I830_FENCE_REG_VALID; 3341 } else 3342 val = 0; 3343 3344 I915_WRITE(FENCE_REG_830_0 + reg * 4, val); 3345 POSTING_READ(FENCE_REG_830_0 + reg * 4); 3346 } 3347 3348 inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj) 3349 { 3350 return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT; 3351 } 3352 3353 static void i915_gem_write_fence(struct drm_device *dev, int reg, 3354 struct drm_i915_gem_object *obj) 3355 { 3356 struct drm_i915_private *dev_priv = dev->dev_private; 3357 3358 /* Ensure that all CPU reads are completed before installing a fence 3359 * and all writes before removing the fence. 3360 */ 3361 if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj)) 3362 mb(); 3363 3364 WARN(obj && (!obj->stride || !obj->tiling_mode), 3365 "bogus fence setup with stride: 0x%x, tiling mode: %i\n", 3366 obj->stride, obj->tiling_mode); 3367 3368 if (IS_GEN2(dev)) 3369 i830_write_fence_reg(dev, reg, obj); 3370 else if (IS_GEN3(dev)) 3371 i915_write_fence_reg(dev, reg, obj); 3372 else if (INTEL_INFO(dev)->gen >= 4) 3373 i965_write_fence_reg(dev, reg, obj); 3374 3375 /* And similarly be paranoid that no direct access to this region 3376 * is reordered to before the fence is installed. 3377 */ 3378 if (i915_gem_object_needs_mb(obj)) 3379 mb(); 3380 } 3381 3382 static inline int fence_number(struct drm_i915_private *dev_priv, 3383 struct drm_i915_fence_reg *fence) 3384 { 3385 return fence - dev_priv->fence_regs; 3386 } 3387 3388 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, 3389 struct drm_i915_fence_reg *fence, 3390 bool enable) 3391 { 3392 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3393 int reg = fence_number(dev_priv, fence); 3394 3395 i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL); 3396 3397 if (enable) { 3398 obj->fence_reg = reg; 3399 fence->obj = obj; 3400 list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list); 3401 } else { 3402 obj->fence_reg = I915_FENCE_REG_NONE; 3403 fence->obj = NULL; 3404 list_del_init(&fence->lru_list); 3405 } 3406 obj->fence_dirty = false; 3407 } 3408 3409 static int 3410 i915_gem_object_wait_fence(struct drm_i915_gem_object *obj) 3411 { 3412 if (obj->last_fenced_req) { 3413 int ret = i915_wait_request(obj->last_fenced_req); 3414 if (ret) 3415 return ret; 3416 3417 i915_gem_request_assign(&obj->last_fenced_req, NULL); 3418 } 3419 3420 return 0; 3421 } 3422 3423 int 3424 i915_gem_object_put_fence(struct drm_i915_gem_object *obj) 3425 { 3426 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3427 struct drm_i915_fence_reg *fence; 3428 int ret; 3429 3430 ret = i915_gem_object_wait_fence(obj); 3431 if (ret) 3432 return ret; 3433 3434 if (obj->fence_reg == I915_FENCE_REG_NONE) 3435 return 0; 3436 3437 fence = &dev_priv->fence_regs[obj->fence_reg]; 3438 3439 if (WARN_ON(fence->pin_count)) 3440 return -EBUSY; 3441 3442 i915_gem_object_fence_lost(obj); 3443 i915_gem_object_update_fence(obj, fence, false); 3444 3445 return 0; 3446 } 3447 3448 static struct drm_i915_fence_reg * 3449 i915_find_fence_reg(struct drm_device *dev) 3450 { 3451 struct drm_i915_private *dev_priv = dev->dev_private; 3452 struct drm_i915_fence_reg *reg, *avail; 3453 int i; 3454 3455 /* First try to find a free reg */ 3456 avail = NULL; 3457 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) { 3458 reg = &dev_priv->fence_regs[i]; 3459 if (!reg->obj) 3460 return reg; 3461 3462 if (!reg->pin_count) 3463 avail = reg; 3464 } 3465 3466 if (avail == NULL) 3467 goto deadlock; 3468 3469 /* None available, try to steal one or wait for a user to finish */ 3470 list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) { 3471 if (reg->pin_count) 3472 continue; 3473 3474 return reg; 3475 } 3476 3477 deadlock: 3478 /* Wait for completion of pending flips which consume fences */ 3479 if (intel_has_pending_fb_unpin(dev)) 3480 return ERR_PTR(-EAGAIN); 3481 3482 return ERR_PTR(-EDEADLK); 3483 } 3484 3485 /** 3486 * i915_gem_object_get_fence - set up fencing for an object 3487 * @obj: object to map through a fence reg 3488 * 3489 * When mapping objects through the GTT, userspace wants to be able to write 3490 * to them without having to worry about swizzling if the object is tiled. 3491 * This function walks the fence regs looking for a free one for @obj, 3492 * stealing one if it can't find any. 3493 * 3494 * It then sets up the reg based on the object's properties: address, pitch 3495 * and tiling format. 3496 * 3497 * For an untiled surface, this removes any existing fence. 3498 */ 3499 int 3500 i915_gem_object_get_fence(struct drm_i915_gem_object *obj) 3501 { 3502 struct drm_device *dev = obj->base.dev; 3503 struct drm_i915_private *dev_priv = dev->dev_private; 3504 bool enable = obj->tiling_mode != I915_TILING_NONE; 3505 struct drm_i915_fence_reg *reg; 3506 int ret; 3507 3508 /* Have we updated the tiling parameters upon the object and so 3509 * will need to serialise the write to the associated fence register? 3510 */ 3511 if (obj->fence_dirty) { 3512 ret = i915_gem_object_wait_fence(obj); 3513 if (ret) 3514 return ret; 3515 } 3516 3517 /* Just update our place in the LRU if our fence is getting reused. */ 3518 if (obj->fence_reg != I915_FENCE_REG_NONE) { 3519 reg = &dev_priv->fence_regs[obj->fence_reg]; 3520 if (!obj->fence_dirty) { 3521 list_move_tail(®->lru_list, 3522 &dev_priv->mm.fence_list); 3523 return 0; 3524 } 3525 } else if (enable) { 3526 if (WARN_ON(!obj->map_and_fenceable)) 3527 return -EINVAL; 3528 3529 reg = i915_find_fence_reg(dev); 3530 if (IS_ERR(reg)) 3531 return PTR_ERR(reg); 3532 3533 if (reg->obj) { 3534 struct drm_i915_gem_object *old = reg->obj; 3535 3536 ret = i915_gem_object_wait_fence(old); 3537 if (ret) 3538 return ret; 3539 3540 i915_gem_object_fence_lost(old); 3541 } 3542 } else 3543 return 0; 3544 3545 i915_gem_object_update_fence(obj, reg, enable); 3546 3547 return 0; 3548 } 3549 3550 static bool i915_gem_valid_gtt_space(struct i915_vma *vma, 3551 unsigned long cache_level) 3552 { 3553 struct drm_mm_node *gtt_space = &vma->node; 3554 struct drm_mm_node *other; 3555 3556 /* 3557 * On some machines we have to be careful when putting differing types 3558 * of snoopable memory together to avoid the prefetcher crossing memory 3559 * domains and dying. During vm initialisation, we decide whether or not 3560 * these constraints apply and set the drm_mm.color_adjust 3561 * appropriately. 3562 */ 3563 if (vma->vm->mm.color_adjust == NULL) 3564 return true; 3565 3566 if (!drm_mm_node_allocated(gtt_space)) 3567 return true; 3568 3569 if (list_empty(>t_space->node_list)) 3570 return true; 3571 3572 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list); 3573 if (other->allocated && !other->hole_follows && other->color != cache_level) 3574 return false; 3575 3576 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list); 3577 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level) 3578 return false; 3579 3580 return true; 3581 } 3582 3583 /** 3584 * Finds free space in the GTT aperture and binds the object there. 3585 */ 3586 static struct i915_vma * 3587 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, 3588 struct i915_address_space *vm, 3589 unsigned alignment, 3590 uint64_t flags, 3591 const struct i915_ggtt_view *view) 3592 { 3593 struct drm_device *dev = obj->base.dev; 3594 struct drm_i915_private *dev_priv = dev->dev_private; 3595 u32 size, fence_size, fence_alignment, unfenced_alignment; 3596 unsigned long start = 3597 flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; 3598 unsigned long end = 3599 flags & PIN_MAPPABLE ? dev_priv->gtt.mappable_end : vm->total; 3600 struct i915_vma *vma; 3601 int ret; 3602 3603 fence_size = i915_gem_get_gtt_size(dev, 3604 obj->base.size, 3605 obj->tiling_mode); 3606 fence_alignment = i915_gem_get_gtt_alignment(dev, 3607 obj->base.size, 3608 obj->tiling_mode, true); 3609 unfenced_alignment = 3610 i915_gem_get_gtt_alignment(dev, 3611 obj->base.size, 3612 obj->tiling_mode, false); 3613 3614 if (alignment == 0) 3615 alignment = flags & PIN_MAPPABLE ? fence_alignment : 3616 unfenced_alignment; 3617 if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) { 3618 DRM_DEBUG("Invalid object alignment requested %u\n", alignment); 3619 return ERR_PTR(-EINVAL); 3620 } 3621 3622 size = flags & PIN_MAPPABLE ? fence_size : obj->base.size; 3623 3624 /* If the object is bigger than the entire aperture, reject it early 3625 * before evicting everything in a vain attempt to find space. 3626 */ 3627 if (obj->base.size > end) { 3628 DRM_DEBUG("Attempting to bind an object larger than the aperture: object=%zd > %s aperture=%lu\n", 3629 obj->base.size, 3630 flags & PIN_MAPPABLE ? "mappable" : "total", 3631 end); 3632 return ERR_PTR(-E2BIG); 3633 } 3634 3635 ret = i915_gem_object_get_pages(obj); 3636 if (ret) 3637 return ERR_PTR(ret); 3638 3639 i915_gem_object_pin_pages(obj); 3640 3641 vma = i915_gem_obj_lookup_or_create_vma_view(obj, vm, view); 3642 if (IS_ERR(vma)) 3643 goto err_unpin; 3644 3645 search_free: 3646 ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, 3647 size, alignment, 3648 obj->cache_level, 3649 start, end, 3650 DRM_MM_SEARCH_DEFAULT, 3651 DRM_MM_CREATE_DEFAULT); 3652 if (ret) { 3653 ret = i915_gem_evict_something(dev, vm, size, alignment, 3654 obj->cache_level, 3655 start, end, 3656 flags); 3657 if (ret == 0) 3658 goto search_free; 3659 3660 goto err_free_vma; 3661 } 3662 if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) { 3663 ret = -EINVAL; 3664 goto err_remove_node; 3665 } 3666 3667 ret = i915_gem_gtt_prepare_object(obj); 3668 if (ret) 3669 goto err_remove_node; 3670 3671 trace_i915_vma_bind(vma, flags); 3672 ret = i915_vma_bind(vma, obj->cache_level, 3673 flags & PIN_GLOBAL ? GLOBAL_BIND : 0); 3674 if (ret) 3675 goto err_finish_gtt; 3676 3677 list_move_tail(&obj->global_list, &dev_priv->mm.bound_list); 3678 list_add_tail(&vma->mm_list, &vm->inactive_list); 3679 3680 return vma; 3681 3682 err_finish_gtt: 3683 i915_gem_gtt_finish_object(obj); 3684 err_remove_node: 3685 drm_mm_remove_node(&vma->node); 3686 err_free_vma: 3687 i915_gem_vma_destroy(vma); 3688 vma = ERR_PTR(ret); 3689 err_unpin: 3690 i915_gem_object_unpin_pages(obj); 3691 return vma; 3692 } 3693 3694 bool 3695 i915_gem_clflush_object(struct drm_i915_gem_object *obj, 3696 bool force) 3697 { 3698 /* If we don't have a page list set up, then we're not pinned 3699 * to GPU, and we can ignore the cache flush because it'll happen 3700 * again at bind time. 3701 */ 3702 if (obj->pages == NULL) 3703 return false; 3704 3705 /* 3706 * Stolen memory is always coherent with the GPU as it is explicitly 3707 * marked as wc by the system, or the system is cache-coherent. 3708 */ 3709 if (obj->stolen) 3710 return false; 3711 3712 /* If the GPU is snooping the contents of the CPU cache, 3713 * we do not need to manually clear the CPU cache lines. However, 3714 * the caches are only snooped when the render cache is 3715 * flushed/invalidated. As we always have to emit invalidations 3716 * and flushes when moving into and out of the RENDER domain, correct 3717 * snooping behaviour occurs naturally as the result of our domain 3718 * tracking. 3719 */ 3720 if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) { 3721 obj->cache_dirty = true; 3722 return false; 3723 } 3724 3725 trace_i915_gem_object_clflush(obj); 3726 drm_clflush_pages(obj->pages, obj->base.size / PAGE_SIZE); 3727 obj->cache_dirty = false; 3728 3729 return true; 3730 } 3731 3732 /** Flushes the GTT write domain for the object if it's dirty. */ 3733 static void 3734 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) 3735 { 3736 uint32_t old_write_domain; 3737 3738 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) 3739 return; 3740 3741 /* No actual flushing is required for the GTT write domain. Writes 3742 * to it immediately go to main memory as far as we know, so there's 3743 * no chipset flush. It also doesn't land in render cache. 3744 * 3745 * However, we do have to enforce the order so that all writes through 3746 * the GTT land before any writes to the device, such as updates to 3747 * the GATT itself. 3748 */ 3749 wmb(); 3750 3751 old_write_domain = obj->base.write_domain; 3752 obj->base.write_domain = 0; 3753 3754 intel_fb_obj_flush(obj, false); 3755 3756 intel_fb_obj_flush(obj, false); 3757 3758 trace_i915_gem_object_change_domain(obj, 3759 obj->base.read_domains, 3760 old_write_domain); 3761 } 3762 3763 /** Flushes the CPU write domain for the object if it's dirty. */ 3764 static void 3765 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) 3766 { 3767 uint32_t old_write_domain; 3768 3769 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) 3770 return; 3771 3772 if (i915_gem_clflush_object(obj, obj->pin_display)) 3773 i915_gem_chipset_flush(obj->base.dev); 3774 3775 old_write_domain = obj->base.write_domain; 3776 obj->base.write_domain = 0; 3777 3778 trace_i915_gem_object_change_domain(obj, 3779 obj->base.read_domains, 3780 old_write_domain); 3781 } 3782 3783 /** 3784 * Moves a single object to the GTT read, and possibly write domain. 3785 * 3786 * This function returns when the move is complete, including waiting on 3787 * flushes to occur. 3788 */ 3789 int 3790 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 3791 { 3792 uint32_t old_write_domain, old_read_domains; 3793 struct i915_vma *vma; 3794 int ret; 3795 3796 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) 3797 return 0; 3798 3799 ret = i915_gem_object_wait_rendering(obj, !write); 3800 if (ret) 3801 return ret; 3802 3803 i915_gem_object_retire(obj); 3804 3805 /* Flush and acquire obj->pages so that we are coherent through 3806 * direct access in memory with previous cached writes through 3807 * shmemfs and that our cache domain tracking remains valid. 3808 * For example, if the obj->filp was moved to swap without us 3809 * being notified and releasing the pages, we would mistakenly 3810 * continue to assume that the obj remained out of the CPU cached 3811 * domain. 3812 */ 3813 ret = i915_gem_object_get_pages(obj); 3814 if (ret) 3815 return ret; 3816 3817 i915_gem_object_flush_cpu_write_domain(obj); 3818 3819 /* Serialise direct access to this object with the barriers for 3820 * coherent writes from the GPU, by effectively invalidating the 3821 * GTT domain upon first access. 3822 */ 3823 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3824 mb(); 3825 3826 old_write_domain = obj->base.write_domain; 3827 old_read_domains = obj->base.read_domains; 3828 3829 /* It should now be out of any other write domains, and we can update 3830 * the domain values for our changes. 3831 */ 3832 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 3833 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 3834 if (write) { 3835 obj->base.read_domains = I915_GEM_DOMAIN_GTT; 3836 obj->base.write_domain = I915_GEM_DOMAIN_GTT; 3837 obj->dirty = 1; 3838 } 3839 3840 if (write) 3841 intel_fb_obj_invalidate(obj, NULL); 3842 3843 trace_i915_gem_object_change_domain(obj, 3844 old_read_domains, 3845 old_write_domain); 3846 3847 /* And bump the LRU for this access */ 3848 vma = i915_gem_obj_to_ggtt(obj); 3849 if (vma && drm_mm_node_allocated(&vma->node) && !obj->active) 3850 list_move_tail(&vma->mm_list, 3851 &to_i915(obj->base.dev)->gtt.base.inactive_list); 3852 3853 return 0; 3854 } 3855 3856 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 3857 enum i915_cache_level cache_level) 3858 { 3859 struct drm_device *dev = obj->base.dev; 3860 struct i915_vma *vma, *next; 3861 int ret; 3862 3863 if (obj->cache_level == cache_level) 3864 return 0; 3865 3866 if (i915_gem_obj_is_pinned(obj)) { 3867 DRM_DEBUG("can not change the cache level of pinned objects\n"); 3868 return -EBUSY; 3869 } 3870 3871 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) { 3872 if (!i915_gem_valid_gtt_space(vma, cache_level)) { 3873 ret = i915_vma_unbind(vma); 3874 if (ret) 3875 return ret; 3876 } 3877 } 3878 3879 if (i915_gem_obj_bound_any(obj)) { 3880 ret = i915_gem_object_finish_gpu(obj); 3881 if (ret) 3882 return ret; 3883 3884 i915_gem_object_finish_gtt(obj); 3885 3886 /* Before SandyBridge, you could not use tiling or fence 3887 * registers with snooped memory, so relinquish any fences 3888 * currently pointing to our region in the aperture. 3889 */ 3890 if (INTEL_INFO(dev)->gen < 6) { 3891 ret = i915_gem_object_put_fence(obj); 3892 if (ret) 3893 return ret; 3894 } 3895 3896 list_for_each_entry(vma, &obj->vma_list, vma_link) 3897 if (drm_mm_node_allocated(&vma->node)) { 3898 ret = i915_vma_bind(vma, cache_level, 3899 vma->bound & GLOBAL_BIND); 3900 if (ret) 3901 return ret; 3902 } 3903 } 3904 3905 list_for_each_entry(vma, &obj->vma_list, vma_link) 3906 vma->node.color = cache_level; 3907 obj->cache_level = cache_level; 3908 3909 if (obj->cache_dirty && 3910 obj->base.write_domain != I915_GEM_DOMAIN_CPU && 3911 cpu_write_needs_clflush(obj)) { 3912 if (i915_gem_clflush_object(obj, true)) 3913 i915_gem_chipset_flush(obj->base.dev); 3914 } 3915 3916 return 0; 3917 } 3918 3919 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 3920 struct drm_file *file) 3921 { 3922 struct drm_i915_gem_caching *args = data; 3923 struct drm_i915_gem_object *obj; 3924 int ret; 3925 3926 ret = i915_mutex_lock_interruptible(dev); 3927 if (ret) 3928 return ret; 3929 3930 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3931 if (&obj->base == NULL) { 3932 ret = -ENOENT; 3933 goto unlock; 3934 } 3935 3936 switch (obj->cache_level) { 3937 case I915_CACHE_LLC: 3938 case I915_CACHE_L3_LLC: 3939 args->caching = I915_CACHING_CACHED; 3940 break; 3941 3942 case I915_CACHE_WT: 3943 args->caching = I915_CACHING_DISPLAY; 3944 break; 3945 3946 default: 3947 args->caching = I915_CACHING_NONE; 3948 break; 3949 } 3950 3951 drm_gem_object_unreference(&obj->base); 3952 unlock: 3953 mutex_unlock(&dev->struct_mutex); 3954 return ret; 3955 } 3956 3957 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 3958 struct drm_file *file) 3959 { 3960 struct drm_i915_gem_caching *args = data; 3961 struct drm_i915_gem_object *obj; 3962 enum i915_cache_level level; 3963 int ret; 3964 3965 switch (args->caching) { 3966 case I915_CACHING_NONE: 3967 level = I915_CACHE_NONE; 3968 break; 3969 case I915_CACHING_CACHED: 3970 level = I915_CACHE_LLC; 3971 break; 3972 case I915_CACHING_DISPLAY: 3973 level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE; 3974 break; 3975 default: 3976 return -EINVAL; 3977 } 3978 3979 ret = i915_mutex_lock_interruptible(dev); 3980 if (ret) 3981 return ret; 3982 3983 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3984 if (&obj->base == NULL) { 3985 ret = -ENOENT; 3986 goto unlock; 3987 } 3988 3989 ret = i915_gem_object_set_cache_level(obj, level); 3990 3991 drm_gem_object_unreference(&obj->base); 3992 unlock: 3993 mutex_unlock(&dev->struct_mutex); 3994 return ret; 3995 } 3996 3997 static bool is_pin_display(struct drm_i915_gem_object *obj) 3998 { 3999 struct i915_vma *vma; 4000 4001 vma = i915_gem_obj_to_ggtt(obj); 4002 if (!vma) 4003 return false; 4004 4005 /* There are 2 sources that pin objects: 4006 * 1. The display engine (scanouts, sprites, cursors); 4007 * 2. Reservations for execbuffer; 4008 * 4009 * We can ignore reservations as we hold the struct_mutex and 4010 * are only called outside of the reservation path. 4011 */ 4012 return vma->pin_count; 4013 } 4014 4015 /* 4016 * Prepare buffer for display plane (scanout, cursors, etc). 4017 * Can be called from an uninterruptible phase (modesetting) and allows 4018 * any flushes to be pipelined (for pageflips). 4019 */ 4020 int 4021 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 4022 u32 alignment, 4023 struct intel_engine_cs *pipelined) 4024 { 4025 u32 old_read_domains, old_write_domain; 4026 bool was_pin_display; 4027 int ret; 4028 4029 if (pipelined != i915_gem_request_get_ring(obj->last_read_req)) { 4030 ret = i915_gem_object_sync(obj, pipelined); 4031 if (ret) 4032 return ret; 4033 } 4034 4035 /* Mark the pin_display early so that we account for the 4036 * display coherency whilst setting up the cache domains. 4037 */ 4038 was_pin_display = obj->pin_display; 4039 obj->pin_display = true; 4040 4041 /* The display engine is not coherent with the LLC cache on gen6. As 4042 * a result, we make sure that the pinning that is about to occur is 4043 * done with uncached PTEs. This is lowest common denominator for all 4044 * chipsets. 4045 * 4046 * However for gen6+, we could do better by using the GFDT bit instead 4047 * of uncaching, which would allow us to flush all the LLC-cached data 4048 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 4049 */ 4050 ret = i915_gem_object_set_cache_level(obj, 4051 HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE); 4052 if (ret) 4053 goto err_unpin_display; 4054 4055 /* As the user may map the buffer once pinned in the display plane 4056 * (e.g. libkms for the bootup splash), we have to ensure that we 4057 * always use map_and_fenceable for all scanout buffers. 4058 */ 4059 ret = i915_gem_obj_ggtt_pin(obj, alignment, PIN_MAPPABLE); 4060 if (ret) 4061 goto err_unpin_display; 4062 4063 i915_gem_object_flush_cpu_write_domain(obj); 4064 4065 old_write_domain = obj->base.write_domain; 4066 old_read_domains = obj->base.read_domains; 4067 4068 /* It should now be out of any other write domains, and we can update 4069 * the domain values for our changes. 4070 */ 4071 obj->base.write_domain = 0; 4072 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 4073 4074 trace_i915_gem_object_change_domain(obj, 4075 old_read_domains, 4076 old_write_domain); 4077 4078 return 0; 4079 4080 err_unpin_display: 4081 WARN_ON(was_pin_display != is_pin_display(obj)); 4082 obj->pin_display = was_pin_display; 4083 return ret; 4084 } 4085 4086 void 4087 i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj) 4088 { 4089 i915_gem_object_ggtt_unpin(obj); 4090 obj->pin_display = is_pin_display(obj); 4091 } 4092 4093 int 4094 i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj) 4095 { 4096 int ret; 4097 4098 if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0) 4099 return 0; 4100 4101 ret = i915_gem_object_wait_rendering(obj, false); 4102 if (ret) 4103 return ret; 4104 4105 /* Ensure that we invalidate the GPU's caches and TLBs. */ 4106 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS; 4107 return 0; 4108 } 4109 4110 /** 4111 * Moves a single object to the CPU read, and possibly write domain. 4112 * 4113 * This function returns when the move is complete, including waiting on 4114 * flushes to occur. 4115 */ 4116 int 4117 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 4118 { 4119 uint32_t old_write_domain, old_read_domains; 4120 int ret; 4121 4122 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) 4123 return 0; 4124 4125 ret = i915_gem_object_wait_rendering(obj, !write); 4126 if (ret) 4127 return ret; 4128 4129 i915_gem_object_retire(obj); 4130 i915_gem_object_flush_gtt_write_domain(obj); 4131 4132 old_write_domain = obj->base.write_domain; 4133 old_read_domains = obj->base.read_domains; 4134 4135 /* Flush the CPU cache if it's still invalid. */ 4136 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { 4137 i915_gem_clflush_object(obj, false); 4138 4139 obj->base.read_domains |= I915_GEM_DOMAIN_CPU; 4140 } 4141 4142 /* It should now be out of any other write domains, and we can update 4143 * the domain values for our changes. 4144 */ 4145 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 4146 4147 /* If we're writing through the CPU, then the GPU read domains will 4148 * need to be invalidated at next use. 4149 */ 4150 if (write) { 4151 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4152 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4153 } 4154 4155 if (write) 4156 intel_fb_obj_invalidate(obj, NULL); 4157 4158 trace_i915_gem_object_change_domain(obj, 4159 old_read_domains, 4160 old_write_domain); 4161 4162 return 0; 4163 } 4164 4165 /* Throttle our rendering by waiting until the ring has completed our requests 4166 * emitted over 20 msec ago. 4167 * 4168 * Note that if we were to use the current jiffies each time around the loop, 4169 * we wouldn't escape the function with any frames outstanding if the time to 4170 * render a frame was over 20ms. 4171 * 4172 * This should get us reasonable parallelism between CPU and GPU but also 4173 * relatively low latency when blocking on a particular request to finish. 4174 */ 4175 static int 4176 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 4177 { 4178 struct drm_i915_private *dev_priv = dev->dev_private; 4179 struct drm_i915_file_private *file_priv = file->driver_priv; 4180 unsigned long recent_enough = jiffies - msecs_to_jiffies(20); 4181 struct drm_i915_gem_request *request, *target = NULL; 4182 unsigned reset_counter; 4183 int ret; 4184 4185 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 4186 if (ret) 4187 return ret; 4188 4189 ret = i915_gem_check_wedge(&dev_priv->gpu_error, false); 4190 if (ret) 4191 return ret; 4192 4193 spin_lock(&file_priv->mm.lock); 4194 list_for_each_entry(request, &file_priv->mm.request_list, client_list) { 4195 if (time_after_eq(request->emitted_jiffies, recent_enough)) 4196 break; 4197 4198 target = request; 4199 } 4200 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 4201 if (target) 4202 i915_gem_request_reference(target); 4203 spin_unlock(&file_priv->mm.lock); 4204 4205 if (target == NULL) 4206 return 0; 4207 4208 ret = __i915_wait_request(target, reset_counter, true, NULL, NULL); 4209 if (ret == 0) 4210 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0); 4211 4212 mutex_lock(&dev->struct_mutex); 4213 i915_gem_request_unreference(target); 4214 mutex_unlock(&dev->struct_mutex); 4215 4216 return ret; 4217 } 4218 4219 static bool 4220 i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags) 4221 { 4222 struct drm_i915_gem_object *obj = vma->obj; 4223 4224 if (alignment && 4225 vma->node.start & (alignment - 1)) 4226 return true; 4227 4228 if (flags & PIN_MAPPABLE && !obj->map_and_fenceable) 4229 return true; 4230 4231 if (flags & PIN_OFFSET_BIAS && 4232 vma->node.start < (flags & PIN_OFFSET_MASK)) 4233 return true; 4234 4235 return false; 4236 } 4237 4238 int 4239 i915_gem_object_pin_view(struct drm_i915_gem_object *obj, 4240 struct i915_address_space *vm, 4241 uint32_t alignment, 4242 uint64_t flags, 4243 const struct i915_ggtt_view *view) 4244 { 4245 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 4246 struct i915_vma *vma; 4247 unsigned bound; 4248 int ret; 4249 4250 if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base)) 4251 return -ENODEV; 4252 4253 if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm))) 4254 return -EINVAL; 4255 4256 if (WARN_ON((flags & (PIN_MAPPABLE | PIN_GLOBAL)) == PIN_MAPPABLE)) 4257 return -EINVAL; 4258 4259 vma = i915_gem_obj_to_vma_view(obj, vm, view); 4260 if (vma) { 4261 if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) 4262 return -EBUSY; 4263 4264 if (i915_vma_misplaced(vma, alignment, flags)) { 4265 WARN(vma->pin_count, 4266 "bo is already pinned with incorrect alignment:" 4267 " offset=%lx, req.alignment=%x, req.map_and_fenceable=%d," 4268 " obj->map_and_fenceable=%d\n", 4269 i915_gem_obj_offset_view(obj, vm, view->type), 4270 alignment, 4271 !!(flags & PIN_MAPPABLE), 4272 obj->map_and_fenceable); 4273 ret = i915_vma_unbind(vma); 4274 if (ret) 4275 return ret; 4276 4277 vma = NULL; 4278 } 4279 } 4280 4281 bound = vma ? vma->bound : 0; 4282 if (vma == NULL || !drm_mm_node_allocated(&vma->node)) { 4283 vma = i915_gem_object_bind_to_vm(obj, vm, alignment, 4284 flags, view); 4285 if (IS_ERR(vma)) 4286 return PTR_ERR(vma); 4287 } 4288 4289 if (flags & PIN_GLOBAL && !(vma->bound & GLOBAL_BIND)) { 4290 ret = i915_vma_bind(vma, obj->cache_level, GLOBAL_BIND); 4291 if (ret) 4292 return ret; 4293 } 4294 4295 if ((bound ^ vma->bound) & GLOBAL_BIND) { 4296 bool mappable, fenceable; 4297 u32 fence_size, fence_alignment; 4298 4299 fence_size = i915_gem_get_gtt_size(obj->base.dev, 4300 obj->base.size, 4301 obj->tiling_mode); 4302 fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev, 4303 obj->base.size, 4304 obj->tiling_mode, 4305 true); 4306 4307 fenceable = (vma->node.size == fence_size && 4308 (vma->node.start & (fence_alignment - 1)) == 0); 4309 4310 mappable = (vma->node.start + obj->base.size <= 4311 dev_priv->gtt.mappable_end); 4312 4313 obj->map_and_fenceable = mappable && fenceable; 4314 } 4315 4316 WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable); 4317 4318 vma->pin_count++; 4319 if (flags & PIN_MAPPABLE) 4320 obj->pin_mappable |= true; 4321 4322 return 0; 4323 } 4324 4325 void 4326 i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj) 4327 { 4328 struct i915_vma *vma = i915_gem_obj_to_ggtt(obj); 4329 4330 BUG_ON(!vma); 4331 BUG_ON(vma->pin_count == 0); 4332 BUG_ON(!i915_gem_obj_ggtt_bound(obj)); 4333 4334 if (--vma->pin_count == 0) 4335 obj->pin_mappable = false; 4336 } 4337 4338 bool 4339 i915_gem_object_pin_fence(struct drm_i915_gem_object *obj) 4340 { 4341 if (obj->fence_reg != I915_FENCE_REG_NONE) { 4342 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 4343 struct i915_vma *ggtt_vma = i915_gem_obj_to_ggtt(obj); 4344 4345 WARN_ON(!ggtt_vma || 4346 dev_priv->fence_regs[obj->fence_reg].pin_count > 4347 ggtt_vma->pin_count); 4348 dev_priv->fence_regs[obj->fence_reg].pin_count++; 4349 return true; 4350 } else 4351 return false; 4352 } 4353 4354 void 4355 i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj) 4356 { 4357 if (obj->fence_reg != I915_FENCE_REG_NONE) { 4358 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 4359 WARN_ON(dev_priv->fence_regs[obj->fence_reg].pin_count <= 0); 4360 dev_priv->fence_regs[obj->fence_reg].pin_count--; 4361 } 4362 } 4363 4364 int 4365 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 4366 struct drm_file *file) 4367 { 4368 struct drm_i915_gem_busy *args = data; 4369 struct drm_i915_gem_object *obj; 4370 int ret; 4371 4372 ret = i915_mutex_lock_interruptible(dev); 4373 if (ret) 4374 return ret; 4375 4376 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4377 if (&obj->base == NULL) { 4378 ret = -ENOENT; 4379 goto unlock; 4380 } 4381 4382 /* Count all active objects as busy, even if they are currently not used 4383 * by the gpu. Users of this interface expect objects to eventually 4384 * become non-busy without any further actions, therefore emit any 4385 * necessary flushes here. 4386 */ 4387 ret = i915_gem_object_flush_active(obj); 4388 4389 args->busy = obj->active; 4390 if (obj->last_read_req) { 4391 struct intel_engine_cs *ring; 4392 BUILD_BUG_ON(I915_NUM_RINGS > 16); 4393 ring = i915_gem_request_get_ring(obj->last_read_req); 4394 args->busy |= intel_ring_flag(ring) << 16; 4395 } 4396 4397 drm_gem_object_unreference(&obj->base); 4398 unlock: 4399 mutex_unlock(&dev->struct_mutex); 4400 return ret; 4401 } 4402 4403 int 4404 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 4405 struct drm_file *file_priv) 4406 { 4407 return i915_gem_ring_throttle(dev, file_priv); 4408 } 4409 4410 int 4411 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 4412 struct drm_file *file_priv) 4413 { 4414 struct drm_i915_private *dev_priv = dev->dev_private; 4415 struct drm_i915_gem_madvise *args = data; 4416 struct drm_i915_gem_object *obj; 4417 int ret; 4418 4419 switch (args->madv) { 4420 case I915_MADV_DONTNEED: 4421 case I915_MADV_WILLNEED: 4422 break; 4423 default: 4424 return -EINVAL; 4425 } 4426 4427 ret = i915_mutex_lock_interruptible(dev); 4428 if (ret) 4429 return ret; 4430 4431 obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle)); 4432 if (&obj->base == NULL) { 4433 ret = -ENOENT; 4434 goto unlock; 4435 } 4436 4437 if (i915_gem_obj_is_pinned(obj)) { 4438 ret = -EINVAL; 4439 goto out; 4440 } 4441 4442 if (obj->pages && 4443 obj->tiling_mode != I915_TILING_NONE && 4444 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 4445 if (obj->madv == I915_MADV_WILLNEED) 4446 i915_gem_object_unpin_pages(obj); 4447 if (args->madv == I915_MADV_WILLNEED) 4448 i915_gem_object_pin_pages(obj); 4449 } 4450 4451 if (obj->madv != __I915_MADV_PURGED) 4452 obj->madv = args->madv; 4453 4454 /* if the object is no longer attached, discard its backing storage */ 4455 if (i915_gem_object_is_purgeable(obj) && obj->pages == NULL) 4456 i915_gem_object_truncate(obj); 4457 4458 args->retained = obj->madv != __I915_MADV_PURGED; 4459 4460 out: 4461 drm_gem_object_unreference(&obj->base); 4462 unlock: 4463 mutex_unlock(&dev->struct_mutex); 4464 return ret; 4465 } 4466 4467 void i915_gem_object_init(struct drm_i915_gem_object *obj, 4468 const struct drm_i915_gem_object_ops *ops) 4469 { 4470 INIT_LIST_HEAD(&obj->global_list); 4471 INIT_LIST_HEAD(&obj->ring_list); 4472 INIT_LIST_HEAD(&obj->obj_exec_link); 4473 INIT_LIST_HEAD(&obj->vma_list); 4474 INIT_LIST_HEAD(&obj->batch_pool_list); 4475 4476 obj->ops = ops; 4477 4478 obj->fence_reg = I915_FENCE_REG_NONE; 4479 obj->madv = I915_MADV_WILLNEED; 4480 4481 i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size); 4482 } 4483 4484 static const struct drm_i915_gem_object_ops i915_gem_object_ops = { 4485 .get_pages = i915_gem_object_get_pages_gtt, 4486 .put_pages = i915_gem_object_put_pages_gtt, 4487 }; 4488 4489 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, 4490 size_t size) 4491 { 4492 struct drm_i915_gem_object *obj; 4493 #if 0 4494 struct address_space *mapping; 4495 gfp_t mask; 4496 #endif 4497 4498 obj = i915_gem_object_alloc(dev); 4499 if (obj == NULL) 4500 return NULL; 4501 4502 if (drm_gem_object_init(dev, &obj->base, size) != 0) { 4503 i915_gem_object_free(obj); 4504 return NULL; 4505 } 4506 4507 #if 0 4508 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 4509 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) { 4510 /* 965gm cannot relocate objects above 4GiB. */ 4511 mask &= ~__GFP_HIGHMEM; 4512 mask |= __GFP_DMA32; 4513 } 4514 4515 mapping = file_inode(obj->base.filp)->i_mapping; 4516 mapping_set_gfp_mask(mapping, mask); 4517 #endif 4518 4519 i915_gem_object_init(obj, &i915_gem_object_ops); 4520 4521 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4522 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4523 4524 if (HAS_LLC(dev)) { 4525 /* On some devices, we can have the GPU use the LLC (the CPU 4526 * cache) for about a 10% performance improvement 4527 * compared to uncached. Graphics requests other than 4528 * display scanout are coherent with the CPU in 4529 * accessing this cache. This means in this mode we 4530 * don't need to clflush on the CPU side, and on the 4531 * GPU side we only need to flush internal caches to 4532 * get data visible to the CPU. 4533 * 4534 * However, we maintain the display planes as UC, and so 4535 * need to rebind when first used as such. 4536 */ 4537 obj->cache_level = I915_CACHE_LLC; 4538 } else 4539 obj->cache_level = I915_CACHE_NONE; 4540 4541 trace_i915_gem_object_create(obj); 4542 4543 return obj; 4544 } 4545 4546 static bool discard_backing_storage(struct drm_i915_gem_object *obj) 4547 { 4548 /* If we are the last user of the backing storage (be it shmemfs 4549 * pages or stolen etc), we know that the pages are going to be 4550 * immediately released. In this case, we can then skip copying 4551 * back the contents from the GPU. 4552 */ 4553 4554 if (obj->madv != I915_MADV_WILLNEED) 4555 return false; 4556 4557 if (obj->base.vm_obj == NULL) 4558 return true; 4559 4560 /* At first glance, this looks racy, but then again so would be 4561 * userspace racing mmap against close. However, the first external 4562 * reference to the filp can only be obtained through the 4563 * i915_gem_mmap_ioctl() which safeguards us against the user 4564 * acquiring such a reference whilst we are in the middle of 4565 * freeing the object. 4566 */ 4567 #if 0 4568 return atomic_long_read(&obj->base.filp->f_count) == 1; 4569 #else 4570 return false; 4571 #endif 4572 } 4573 4574 void i915_gem_free_object(struct drm_gem_object *gem_obj) 4575 { 4576 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 4577 struct drm_device *dev = obj->base.dev; 4578 struct drm_i915_private *dev_priv = dev->dev_private; 4579 struct i915_vma *vma, *next; 4580 4581 intel_runtime_pm_get(dev_priv); 4582 4583 trace_i915_gem_object_destroy(obj); 4584 4585 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) { 4586 int ret; 4587 4588 vma->pin_count = 0; 4589 ret = i915_vma_unbind(vma); 4590 if (WARN_ON(ret == -ERESTARTSYS)) { 4591 bool was_interruptible; 4592 4593 was_interruptible = dev_priv->mm.interruptible; 4594 dev_priv->mm.interruptible = false; 4595 4596 WARN_ON(i915_vma_unbind(vma)); 4597 4598 dev_priv->mm.interruptible = was_interruptible; 4599 } 4600 } 4601 4602 /* Stolen objects don't hold a ref, but do hold pin count. Fix that up 4603 * before progressing. */ 4604 if (obj->stolen) 4605 i915_gem_object_unpin_pages(obj); 4606 4607 WARN_ON(obj->frontbuffer_bits); 4608 4609 if (obj->pages && obj->madv == I915_MADV_WILLNEED && 4610 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES && 4611 obj->tiling_mode != I915_TILING_NONE) 4612 i915_gem_object_unpin_pages(obj); 4613 4614 if (WARN_ON(obj->pages_pin_count)) 4615 obj->pages_pin_count = 0; 4616 if (discard_backing_storage(obj)) 4617 obj->madv = I915_MADV_DONTNEED; 4618 i915_gem_object_put_pages(obj); 4619 i915_gem_object_free_mmap_offset(obj); 4620 4621 BUG_ON(obj->pages); 4622 4623 #if 0 4624 if (obj->base.import_attach) 4625 drm_prime_gem_destroy(&obj->base, NULL); 4626 #endif 4627 4628 if (obj->ops->release) 4629 obj->ops->release(obj); 4630 4631 drm_gem_object_release(&obj->base); 4632 i915_gem_info_remove_obj(dev_priv, obj->base.size); 4633 4634 kfree(obj->bit_17); 4635 i915_gem_object_free(obj); 4636 4637 intel_runtime_pm_put(dev_priv); 4638 } 4639 4640 struct i915_vma *i915_gem_obj_to_vma_view(struct drm_i915_gem_object *obj, 4641 struct i915_address_space *vm, 4642 const struct i915_ggtt_view *view) 4643 { 4644 struct i915_vma *vma; 4645 list_for_each_entry(vma, &obj->vma_list, vma_link) 4646 if (vma->vm == vm && vma->ggtt_view.type == view->type) 4647 return vma; 4648 4649 return NULL; 4650 } 4651 4652 void i915_gem_vma_destroy(struct i915_vma *vma) 4653 { 4654 struct i915_address_space *vm = NULL; 4655 WARN_ON(vma->node.allocated); 4656 4657 /* Keep the vma as a placeholder in the execbuffer reservation lists */ 4658 if (!list_empty(&vma->exec_list)) 4659 return; 4660 4661 vm = vma->vm; 4662 4663 if (!i915_is_ggtt(vm)) 4664 i915_ppgtt_put(i915_vm_to_ppgtt(vm)); 4665 4666 list_del(&vma->vma_link); 4667 4668 kfree(vma); 4669 } 4670 4671 static void 4672 i915_gem_stop_ringbuffers(struct drm_device *dev) 4673 { 4674 struct drm_i915_private *dev_priv = dev->dev_private; 4675 struct intel_engine_cs *ring; 4676 int i; 4677 4678 for_each_ring(ring, dev_priv, i) 4679 dev_priv->gt.stop_ring(ring); 4680 } 4681 4682 int 4683 i915_gem_suspend(struct drm_device *dev) 4684 { 4685 struct drm_i915_private *dev_priv = dev->dev_private; 4686 int ret = 0; 4687 4688 mutex_lock(&dev->struct_mutex); 4689 ret = i915_gpu_idle(dev); 4690 if (ret) 4691 goto err; 4692 4693 i915_gem_retire_requests(dev); 4694 4695 /* Under UMS, be paranoid and evict. */ 4696 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 4697 i915_gem_evict_everything(dev); 4698 4699 i915_gem_stop_ringbuffers(dev); 4700 mutex_unlock(&dev->struct_mutex); 4701 4702 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); 4703 cancel_delayed_work_sync(&dev_priv->mm.retire_work); 4704 #if 0 4705 flush_delayed_work(&dev_priv->mm.idle_work); 4706 #endif 4707 4708 /* Assert that we sucessfully flushed all the work and 4709 * reset the GPU back to its idle, low power state. 4710 */ 4711 WARN_ON(dev_priv->mm.busy); 4712 4713 return 0; 4714 4715 err: 4716 mutex_unlock(&dev->struct_mutex); 4717 return ret; 4718 } 4719 4720 int i915_gem_l3_remap(struct intel_engine_cs *ring, int slice) 4721 { 4722 struct drm_device *dev = ring->dev; 4723 struct drm_i915_private *dev_priv = dev->dev_private; 4724 u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200); 4725 u32 *remap_info = dev_priv->l3_parity.remap_info[slice]; 4726 int i, ret; 4727 4728 if (!HAS_L3_DPF(dev) || !remap_info) 4729 return 0; 4730 4731 ret = intel_ring_begin(ring, GEN7_L3LOG_SIZE / 4 * 3); 4732 if (ret) 4733 return ret; 4734 4735 /* 4736 * Note: We do not worry about the concurrent register cacheline hang 4737 * here because no other code should access these registers other than 4738 * at initialization time. 4739 */ 4740 for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) { 4741 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 4742 intel_ring_emit(ring, reg_base + i); 4743 intel_ring_emit(ring, remap_info[i/4]); 4744 } 4745 4746 intel_ring_advance(ring); 4747 4748 return ret; 4749 } 4750 4751 void i915_gem_init_swizzling(struct drm_device *dev) 4752 { 4753 struct drm_i915_private *dev_priv = dev->dev_private; 4754 4755 if (INTEL_INFO(dev)->gen < 5 || 4756 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 4757 return; 4758 4759 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 4760 DISP_TILE_SURFACE_SWIZZLING); 4761 4762 if (IS_GEN5(dev)) 4763 return; 4764 4765 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 4766 if (IS_GEN6(dev)) 4767 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 4768 else if (IS_GEN7(dev)) 4769 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 4770 else if (IS_GEN8(dev)) 4771 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW)); 4772 else 4773 BUG(); 4774 } 4775 4776 static bool 4777 intel_enable_blt(struct drm_device *dev) 4778 { 4779 if (!HAS_BLT(dev)) 4780 return false; 4781 4782 /* The blitter was dysfunctional on early prototypes */ 4783 if (IS_GEN6(dev) && dev->pdev->revision < 8) { 4784 DRM_INFO("BLT not supported on this pre-production hardware;" 4785 " graphics performance will be degraded.\n"); 4786 return false; 4787 } 4788 4789 return true; 4790 } 4791 4792 static void init_unused_ring(struct drm_device *dev, u32 base) 4793 { 4794 struct drm_i915_private *dev_priv = dev->dev_private; 4795 4796 I915_WRITE(RING_CTL(base), 0); 4797 I915_WRITE(RING_HEAD(base), 0); 4798 I915_WRITE(RING_TAIL(base), 0); 4799 I915_WRITE(RING_START(base), 0); 4800 } 4801 4802 static void init_unused_rings(struct drm_device *dev) 4803 { 4804 if (IS_I830(dev)) { 4805 init_unused_ring(dev, PRB1_BASE); 4806 init_unused_ring(dev, SRB0_BASE); 4807 init_unused_ring(dev, SRB1_BASE); 4808 init_unused_ring(dev, SRB2_BASE); 4809 init_unused_ring(dev, SRB3_BASE); 4810 } else if (IS_GEN2(dev)) { 4811 init_unused_ring(dev, SRB0_BASE); 4812 init_unused_ring(dev, SRB1_BASE); 4813 } else if (IS_GEN3(dev)) { 4814 init_unused_ring(dev, PRB1_BASE); 4815 init_unused_ring(dev, PRB2_BASE); 4816 } 4817 } 4818 4819 int i915_gem_init_rings(struct drm_device *dev) 4820 { 4821 struct drm_i915_private *dev_priv = dev->dev_private; 4822 int ret; 4823 4824 ret = intel_init_render_ring_buffer(dev); 4825 if (ret) 4826 return ret; 4827 4828 if (HAS_BSD(dev)) { 4829 ret = intel_init_bsd_ring_buffer(dev); 4830 if (ret) 4831 goto cleanup_render_ring; 4832 } 4833 4834 if (intel_enable_blt(dev)) { 4835 ret = intel_init_blt_ring_buffer(dev); 4836 if (ret) 4837 goto cleanup_bsd_ring; 4838 } 4839 4840 if (HAS_VEBOX(dev)) { 4841 ret = intel_init_vebox_ring_buffer(dev); 4842 if (ret) 4843 goto cleanup_blt_ring; 4844 } 4845 4846 if (HAS_BSD2(dev)) { 4847 ret = intel_init_bsd2_ring_buffer(dev); 4848 if (ret) 4849 goto cleanup_vebox_ring; 4850 } 4851 4852 ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000)); 4853 if (ret) 4854 goto cleanup_bsd2_ring; 4855 4856 return 0; 4857 4858 cleanup_bsd2_ring: 4859 intel_cleanup_ring_buffer(&dev_priv->ring[VCS2]); 4860 cleanup_vebox_ring: 4861 intel_cleanup_ring_buffer(&dev_priv->ring[VECS]); 4862 cleanup_blt_ring: 4863 intel_cleanup_ring_buffer(&dev_priv->ring[BCS]); 4864 cleanup_bsd_ring: 4865 intel_cleanup_ring_buffer(&dev_priv->ring[VCS]); 4866 cleanup_render_ring: 4867 intel_cleanup_ring_buffer(&dev_priv->ring[RCS]); 4868 4869 return ret; 4870 } 4871 4872 int 4873 i915_gem_init_hw(struct drm_device *dev) 4874 { 4875 struct drm_i915_private *dev_priv = dev->dev_private; 4876 struct intel_engine_cs *ring; 4877 int ret, i; 4878 4879 #if 0 4880 if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) 4881 return -EIO; 4882 #endif 4883 4884 /* Double layer security blanket, see i915_gem_init() */ 4885 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 4886 4887 if (dev_priv->ellc_size) 4888 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf)); 4889 4890 if (IS_HASWELL(dev)) 4891 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev) ? 4892 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); 4893 4894 if (HAS_PCH_NOP(dev)) { 4895 if (IS_IVYBRIDGE(dev)) { 4896 u32 temp = I915_READ(GEN7_MSG_CTL); 4897 temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK); 4898 I915_WRITE(GEN7_MSG_CTL, temp); 4899 } else if (INTEL_INFO(dev)->gen >= 7) { 4900 u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT); 4901 temp &= ~RESET_PCH_HANDSHAKE_ENABLE; 4902 I915_WRITE(HSW_NDE_RSTWRN_OPT, temp); 4903 } 4904 } 4905 4906 i915_gem_init_swizzling(dev); 4907 4908 /* 4909 * At least 830 can leave some of the unused rings 4910 * "active" (ie. head != tail) after resume which 4911 * will prevent c3 entry. Makes sure all unused rings 4912 * are totally idle. 4913 */ 4914 init_unused_rings(dev); 4915 4916 for_each_ring(ring, dev_priv, i) { 4917 ret = ring->init_hw(ring); 4918 if (ret) 4919 goto out; 4920 } 4921 4922 for (i = 0; i < NUM_L3_SLICES(dev); i++) 4923 i915_gem_l3_remap(&dev_priv->ring[RCS], i); 4924 4925 ret = i915_ppgtt_init_hw(dev); 4926 if (ret && ret != -EIO) { 4927 DRM_ERROR("PPGTT enable failed %d\n", ret); 4928 i915_gem_cleanup_ringbuffer(dev); 4929 } 4930 4931 ret = i915_gem_context_enable(dev_priv); 4932 if (ret && ret != -EIO) { 4933 DRM_ERROR("Context enable failed %d\n", ret); 4934 i915_gem_cleanup_ringbuffer(dev); 4935 4936 goto out; 4937 } 4938 4939 out: 4940 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 4941 return ret; 4942 } 4943 4944 int i915_gem_init(struct drm_device *dev) 4945 { 4946 struct drm_i915_private *dev_priv = dev->dev_private; 4947 int ret; 4948 4949 i915.enable_execlists = intel_sanitize_enable_execlists(dev, 4950 i915.enable_execlists); 4951 4952 mutex_lock(&dev->struct_mutex); 4953 4954 if (IS_VALLEYVIEW(dev)) { 4955 /* VLVA0 (potential hack), BIOS isn't actually waking us */ 4956 I915_WRITE(VLV_GTLC_WAKE_CTRL, VLV_GTLC_ALLOWWAKEREQ); 4957 if (wait_for((I915_READ(VLV_GTLC_PW_STATUS) & 4958 VLV_GTLC_ALLOWWAKEACK), 10)) 4959 DRM_DEBUG_DRIVER("allow wake ack timed out\n"); 4960 } 4961 4962 if (!i915.enable_execlists) { 4963 dev_priv->gt.do_execbuf = i915_gem_ringbuffer_submission; 4964 dev_priv->gt.init_rings = i915_gem_init_rings; 4965 dev_priv->gt.cleanup_ring = intel_cleanup_ring_buffer; 4966 dev_priv->gt.stop_ring = intel_stop_ring_buffer; 4967 } else { 4968 dev_priv->gt.do_execbuf = intel_execlists_submission; 4969 dev_priv->gt.init_rings = intel_logical_rings_init; 4970 dev_priv->gt.cleanup_ring = intel_logical_ring_cleanup; 4971 dev_priv->gt.stop_ring = intel_logical_ring_stop; 4972 } 4973 4974 /* This is just a security blanket to placate dragons. 4975 * On some systems, we very sporadically observe that the first TLBs 4976 * used by the CS may be stale, despite us poking the TLB reset. If 4977 * we hold the forcewake during initialisation these problems 4978 * just magically go away. 4979 */ 4980 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 4981 4982 ret = i915_gem_init_userptr(dev); 4983 if (ret) 4984 goto out_unlock; 4985 4986 i915_gem_init_global_gtt(dev); 4987 4988 ret = i915_gem_context_init(dev); 4989 if (ret) 4990 goto out_unlock; 4991 4992 ret = dev_priv->gt.init_rings(dev); 4993 if (ret) 4994 goto out_unlock; 4995 4996 ret = i915_gem_init_hw(dev); 4997 if (ret == -EIO) { 4998 /* Allow ring initialisation to fail by marking the GPU as 4999 * wedged. But we only want to do this where the GPU is angry, 5000 * for all other failure, such as an allocation failure, bail. 5001 */ 5002 DRM_ERROR("Failed to initialize GPU, declaring it wedged\n"); 5003 atomic_set_mask(I915_WEDGED, &dev_priv->gpu_error.reset_counter); 5004 ret = 0; 5005 } 5006 5007 out_unlock: 5008 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5009 mutex_unlock(&dev->struct_mutex); 5010 5011 return ret; 5012 } 5013 5014 void 5015 i915_gem_cleanup_ringbuffer(struct drm_device *dev) 5016 { 5017 struct drm_i915_private *dev_priv = dev->dev_private; 5018 struct intel_engine_cs *ring; 5019 int i; 5020 5021 for_each_ring(ring, dev_priv, i) 5022 dev_priv->gt.cleanup_ring(ring); 5023 } 5024 5025 static void 5026 init_ring_lists(struct intel_engine_cs *ring) 5027 { 5028 INIT_LIST_HEAD(&ring->active_list); 5029 INIT_LIST_HEAD(&ring->request_list); 5030 } 5031 5032 void i915_init_vm(struct drm_i915_private *dev_priv, 5033 struct i915_address_space *vm) 5034 { 5035 if (!i915_is_ggtt(vm)) 5036 drm_mm_init(&vm->mm, vm->start, vm->total); 5037 vm->dev = dev_priv->dev; 5038 INIT_LIST_HEAD(&vm->active_list); 5039 INIT_LIST_HEAD(&vm->inactive_list); 5040 INIT_LIST_HEAD(&vm->global_link); 5041 list_add_tail(&vm->global_link, &dev_priv->vm_list); 5042 } 5043 5044 void 5045 i915_gem_load(struct drm_device *dev) 5046 { 5047 struct drm_i915_private *dev_priv = dev->dev_private; 5048 int i; 5049 5050 INIT_LIST_HEAD(&dev_priv->vm_list); 5051 i915_init_vm(dev_priv, &dev_priv->gtt.base); 5052 5053 INIT_LIST_HEAD(&dev_priv->context_list); 5054 INIT_LIST_HEAD(&dev_priv->mm.unbound_list); 5055 INIT_LIST_HEAD(&dev_priv->mm.bound_list); 5056 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 5057 for (i = 0; i < I915_NUM_RINGS; i++) 5058 init_ring_lists(&dev_priv->ring[i]); 5059 for (i = 0; i < I915_MAX_NUM_FENCES; i++) 5060 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); 5061 INIT_DELAYED_WORK(&dev_priv->mm.retire_work, 5062 i915_gem_retire_work_handler); 5063 INIT_DELAYED_WORK(&dev_priv->mm.idle_work, 5064 i915_gem_idle_work_handler); 5065 init_waitqueue_head(&dev_priv->gpu_error.reset_queue); 5066 5067 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */ 5068 if (!drm_core_check_feature(dev, DRIVER_MODESET) && IS_GEN3(dev)) { 5069 I915_WRITE(MI_ARB_STATE, 5070 _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE)); 5071 } 5072 5073 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; 5074 5075 /* Old X drivers will take 0-2 for front, back, depth buffers */ 5076 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 5077 dev_priv->fence_reg_start = 3; 5078 5079 if (INTEL_INFO(dev)->gen >= 7 && !IS_VALLEYVIEW(dev)) 5080 dev_priv->num_fence_regs = 32; 5081 else if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) 5082 dev_priv->num_fence_regs = 16; 5083 else 5084 dev_priv->num_fence_regs = 8; 5085 5086 /* Initialize fence registers to zero */ 5087 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 5088 i915_gem_restore_fences(dev); 5089 5090 i915_gem_detect_bit_6_swizzle(dev); 5091 init_waitqueue_head(&dev_priv->pending_flip_queue); 5092 5093 dev_priv->mm.interruptible = true; 5094 5095 #if 0 5096 dev_priv->mm.shrinker.scan_objects = i915_gem_shrinker_scan; 5097 dev_priv->mm.shrinker.count_objects = i915_gem_shrinker_count; 5098 dev_priv->mm.shrinker.seeks = DEFAULT_SEEKS; 5099 register_shrinker(&dev_priv->mm.shrinker); 5100 5101 dev_priv->mm.oom_notifier.notifier_call = i915_gem_shrinker_oom; 5102 register_oom_notifier(&dev_priv->mm.oom_notifier); 5103 #endif 5104 5105 i915_gem_batch_pool_init(dev, &dev_priv->mm.batch_pool); 5106 5107 lockinit(&dev_priv->fb_tracking.lock, "drmftl", 0, LK_CANRECURSE); 5108 } 5109 5110 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 5111 { 5112 struct drm_i915_file_private *file_priv = file->driver_priv; 5113 5114 cancel_delayed_work_sync(&file_priv->mm.idle_work); 5115 5116 /* Clean up our request list when the client is going away, so that 5117 * later retire_requests won't dereference our soon-to-be-gone 5118 * file_priv. 5119 */ 5120 spin_lock(&file_priv->mm.lock); 5121 while (!list_empty(&file_priv->mm.request_list)) { 5122 struct drm_i915_gem_request *request; 5123 5124 request = list_first_entry(&file_priv->mm.request_list, 5125 struct drm_i915_gem_request, 5126 client_list); 5127 list_del(&request->client_list); 5128 request->file_priv = NULL; 5129 } 5130 spin_unlock(&file_priv->mm.lock); 5131 } 5132 5133 int 5134 i915_gem_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, 5135 vm_ooffset_t foff, struct ucred *cred, u_short *color) 5136 { 5137 *color = 0; /* XXXKIB */ 5138 return (0); 5139 } 5140 5141 void 5142 i915_gem_pager_dtor(void *handle) 5143 { 5144 struct drm_gem_object *obj; 5145 struct drm_device *dev; 5146 5147 obj = handle; 5148 dev = obj->dev; 5149 5150 mutex_lock(&dev->struct_mutex); 5151 drm_gem_free_mmap_offset(obj); 5152 i915_gem_release_mmap(to_intel_bo(obj)); 5153 drm_gem_object_unreference(obj); 5154 mutex_unlock(&dev->struct_mutex); 5155 } 5156 5157 static void 5158 i915_gem_file_idle_work_handler(struct work_struct *work) 5159 { 5160 struct drm_i915_file_private *file_priv = 5161 container_of(work, typeof(*file_priv), mm.idle_work.work); 5162 5163 atomic_set(&file_priv->rps_wait_boost, false); 5164 } 5165 5166 int i915_gem_open(struct drm_device *dev, struct drm_file *file) 5167 { 5168 struct drm_i915_file_private *file_priv; 5169 int ret; 5170 5171 DRM_DEBUG_DRIVER("\n"); 5172 5173 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 5174 if (!file_priv) 5175 return -ENOMEM; 5176 5177 file->driver_priv = file_priv; 5178 file_priv->dev_priv = dev->dev_private; 5179 file_priv->file = file; 5180 5181 spin_init(&file_priv->mm.lock, "i915_priv"); 5182 INIT_LIST_HEAD(&file_priv->mm.request_list); 5183 INIT_DELAYED_WORK(&file_priv->mm.idle_work, 5184 i915_gem_file_idle_work_handler); 5185 5186 ret = i915_gem_context_open(dev, file); 5187 if (ret) 5188 kfree(file_priv); 5189 5190 return ret; 5191 } 5192 5193 /** 5194 * i915_gem_track_fb - update frontbuffer tracking 5195 * old: current GEM buffer for the frontbuffer slots 5196 * new: new GEM buffer for the frontbuffer slots 5197 * frontbuffer_bits: bitmask of frontbuffer slots 5198 * 5199 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them 5200 * from @old and setting them in @new. Both @old and @new can be NULL. 5201 */ 5202 void i915_gem_track_fb(struct drm_i915_gem_object *old, 5203 struct drm_i915_gem_object *new, 5204 unsigned frontbuffer_bits) 5205 { 5206 if (old) { 5207 WARN_ON(!mutex_is_locked(&old->base.dev->struct_mutex)); 5208 WARN_ON(!(old->frontbuffer_bits & frontbuffer_bits)); 5209 old->frontbuffer_bits &= ~frontbuffer_bits; 5210 } 5211 5212 if (new) { 5213 WARN_ON(!mutex_is_locked(&new->base.dev->struct_mutex)); 5214 WARN_ON(new->frontbuffer_bits & frontbuffer_bits); 5215 new->frontbuffer_bits |= frontbuffer_bits; 5216 } 5217 } 5218 5219 #if 0 5220 static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task) 5221 { 5222 if (!mutex_is_locked(mutex)) 5223 return false; 5224 5225 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES) 5226 return mutex->owner == task; 5227 #else 5228 /* Since UP may be pre-empted, we cannot assume that we own the lock */ 5229 return false; 5230 #endif 5231 } 5232 #endif 5233 5234 #if 0 5235 static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock) 5236 { 5237 if (!mutex_trylock(&dev->struct_mutex)) { 5238 if (!mutex_is_locked_by(&dev->struct_mutex, current)) 5239 return false; 5240 5241 if (to_i915(dev)->mm.shrinker_no_lock_stealing) 5242 return false; 5243 5244 *unlock = false; 5245 } else 5246 *unlock = true; 5247 5248 return true; 5249 } 5250 5251 static int num_vma_bound(struct drm_i915_gem_object *obj) 5252 { 5253 struct i915_vma *vma; 5254 int count = 0; 5255 5256 list_for_each_entry(vma, &obj->vma_list, vma_link) 5257 if (drm_mm_node_allocated(&vma->node)) 5258 count++; 5259 5260 return count; 5261 } 5262 5263 static unsigned long 5264 i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) 5265 { 5266 struct drm_i915_private *dev_priv = 5267 container_of(shrinker, struct drm_i915_private, mm.shrinker); 5268 struct drm_device *dev = dev_priv->dev; 5269 struct drm_i915_gem_object *obj; 5270 unsigned long count; 5271 bool unlock; 5272 5273 if (!i915_gem_shrinker_lock(dev, &unlock)) 5274 return 0; 5275 5276 count = 0; 5277 list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) 5278 if (obj->pages_pin_count == 0) 5279 count += obj->base.size >> PAGE_SHIFT; 5280 5281 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 5282 if (!i915_gem_obj_is_pinned(obj) && 5283 obj->pages_pin_count == num_vma_bound(obj)) 5284 count += obj->base.size >> PAGE_SHIFT; 5285 } 5286 5287 if (unlock) 5288 mutex_unlock(&dev->struct_mutex); 5289 5290 return count; 5291 } 5292 #endif 5293 5294 /* All the new VM stuff */ 5295 unsigned long i915_gem_obj_offset_view(struct drm_i915_gem_object *o, 5296 struct i915_address_space *vm, 5297 enum i915_ggtt_view_type view) 5298 { 5299 struct drm_i915_private *dev_priv = o->base.dev->dev_private; 5300 struct i915_vma *vma; 5301 5302 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); 5303 5304 list_for_each_entry(vma, &o->vma_list, vma_link) { 5305 if (vma->vm == vm && vma->ggtt_view.type == view) 5306 return vma->node.start; 5307 5308 } 5309 WARN(1, "%s vma for this object not found.\n", 5310 i915_is_ggtt(vm) ? "global" : "ppgtt"); 5311 return -1; 5312 } 5313 5314 bool i915_gem_obj_bound_view(struct drm_i915_gem_object *o, 5315 struct i915_address_space *vm, 5316 enum i915_ggtt_view_type view) 5317 { 5318 struct i915_vma *vma; 5319 5320 list_for_each_entry(vma, &o->vma_list, vma_link) 5321 if (vma->vm == vm && 5322 vma->ggtt_view.type == view && 5323 drm_mm_node_allocated(&vma->node)) 5324 return true; 5325 5326 return false; 5327 } 5328 5329 bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o) 5330 { 5331 struct i915_vma *vma; 5332 5333 list_for_each_entry(vma, &o->vma_list, vma_link) 5334 if (drm_mm_node_allocated(&vma->node)) 5335 return true; 5336 5337 return false; 5338 } 5339 5340 unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o, 5341 struct i915_address_space *vm) 5342 { 5343 struct drm_i915_private *dev_priv = o->base.dev->dev_private; 5344 struct i915_vma *vma; 5345 5346 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); 5347 5348 BUG_ON(list_empty(&o->vma_list)); 5349 5350 list_for_each_entry(vma, &o->vma_list, vma_link) 5351 if (vma->vm == vm) 5352 return vma->node.size; 5353 5354 return 0; 5355 } 5356 5357 #if 0 5358 static unsigned long 5359 i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) 5360 { 5361 struct drm_i915_private *dev_priv = 5362 container_of(shrinker, struct drm_i915_private, mm.shrinker); 5363 struct drm_device *dev = dev_priv->dev; 5364 unsigned long freed; 5365 bool unlock; 5366 5367 if (!i915_gem_shrinker_lock(dev, &unlock)) 5368 return SHRINK_STOP; 5369 5370 freed = i915_gem_shrink(dev_priv, 5371 sc->nr_to_scan, 5372 I915_SHRINK_BOUND | 5373 I915_SHRINK_UNBOUND | 5374 I915_SHRINK_PURGEABLE); 5375 if (freed < sc->nr_to_scan) 5376 freed += i915_gem_shrink(dev_priv, 5377 sc->nr_to_scan - freed, 5378 I915_SHRINK_BOUND | 5379 I915_SHRINK_UNBOUND); 5380 if (unlock) 5381 mutex_unlock(&dev->struct_mutex); 5382 5383 return freed; 5384 } 5385 5386 static int 5387 i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr) 5388 { 5389 struct drm_i915_private *dev_priv = 5390 container_of(nb, struct drm_i915_private, mm.oom_notifier); 5391 struct drm_device *dev = dev_priv->dev; 5392 struct drm_i915_gem_object *obj; 5393 unsigned long timeout = msecs_to_jiffies(5000) + 1; 5394 unsigned long pinned, bound, unbound, freed_pages; 5395 bool was_interruptible; 5396 bool unlock; 5397 5398 while (!i915_gem_shrinker_lock(dev, &unlock) && --timeout) { 5399 schedule_timeout_killable(1); 5400 if (fatal_signal_pending(current)) 5401 return NOTIFY_DONE; 5402 } 5403 if (timeout == 0) { 5404 pr_err("Unable to purge GPU memory due lock contention.\n"); 5405 return NOTIFY_DONE; 5406 } 5407 5408 was_interruptible = dev_priv->mm.interruptible; 5409 dev_priv->mm.interruptible = false; 5410 5411 freed_pages = i915_gem_shrink_all(dev_priv); 5412 5413 dev_priv->mm.interruptible = was_interruptible; 5414 5415 /* Because we may be allocating inside our own driver, we cannot 5416 * assert that there are no objects with pinned pages that are not 5417 * being pointed to by hardware. 5418 */ 5419 unbound = bound = pinned = 0; 5420 list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) { 5421 if (!obj->base.filp) /* not backed by a freeable object */ 5422 continue; 5423 5424 if (obj->pages_pin_count) 5425 pinned += obj->base.size; 5426 else 5427 unbound += obj->base.size; 5428 } 5429 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 5430 if (!obj->base.filp) 5431 continue; 5432 5433 if (obj->pages_pin_count) 5434 pinned += obj->base.size; 5435 else 5436 bound += obj->base.size; 5437 } 5438 5439 if (unlock) 5440 mutex_unlock(&dev->struct_mutex); 5441 5442 if (freed_pages || unbound || bound) 5443 pr_info("Purging GPU memory, %lu bytes freed, %lu bytes still pinned.\n", 5444 freed_pages << PAGE_SHIFT, pinned); 5445 if (unbound || bound) 5446 pr_err("%lu and %lu bytes still available in the " 5447 "bound and unbound GPU page lists.\n", 5448 bound, unbound); 5449 5450 *(unsigned long *)ptr += freed_pages; 5451 return NOTIFY_DONE; 5452 } 5453 #endif 5454 5455 struct i915_vma *i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj) 5456 { 5457 struct i915_address_space *ggtt = i915_obj_to_ggtt(obj); 5458 struct i915_vma *vma; 5459 5460 list_for_each_entry(vma, &obj->vma_list, vma_link) 5461 if (vma->vm == ggtt && 5462 vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) 5463 return vma; 5464 5465 return NULL; 5466 } 5467