1 /* $OpenBSD: i915_gem.c,v 1.106 2016/04/05 21:22:02 kettenis Exp $ */ 2 /* 3 * Copyright (c) 2008-2009 Owain G. Ainsworth <oga@openbsd.org> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 /* 18 * Copyright © 2008 Intel Corporation 19 * 20 * Permission is hereby granted, free of charge, to any person obtaining a 21 * copy of this software and associated documentation files (the "Software"), 22 * to deal in the Software without restriction, including without limitation 23 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 24 * and/or sell copies of the Software, and to permit persons to whom the 25 * Software is furnished to do so, subject to the following conditions: 26 * 27 * The above copyright notice and this permission notice (including the next 28 * paragraph) shall be included in all copies or substantial portions of the 29 * Software. 30 * 31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 32 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 33 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 34 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 35 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 36 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 37 * IN THE SOFTWARE. 38 * 39 * Authors: 40 * Eric Anholt <eric@anholt.net> 41 * 42 */ 43 44 #include <dev/pci/drm/drmP.h> 45 #include <dev/pci/drm/drm_vma_manager.h> 46 #include <dev/pci/drm/i915_drm.h> 47 #include "i915_drv.h" 48 #include "i915_trace.h" 49 #include "intel_drv.h" 50 51 #include <machine/pmap.h> 52 53 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); 54 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj, 55 bool force); 56 static __must_check int 57 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, 58 bool readonly); 59 static __must_check int 60 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, 61 struct i915_address_space *vm, 62 unsigned alignment, 63 bool map_and_fenceable, 64 bool nonblocking); 65 static int i915_gem_phys_pwrite(struct drm_device *dev, 66 struct drm_i915_gem_object *obj, 67 struct drm_i915_gem_pwrite *args, 68 struct drm_file *file); 69 70 static void i915_gem_write_fence(struct drm_device *dev, int reg, 71 struct drm_i915_gem_object *obj); 72 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, 73 struct drm_i915_fence_reg *fence, 74 bool enable); 75 76 #ifdef notyet 77 static unsigned long i915_gem_inactive_count(struct shrinker *shrinker, 78 struct shrink_control *sc); 79 static unsigned long i915_gem_inactive_scan(struct shrinker *shrinker, 80 struct shrink_control *sc); 81 static unsigned long i915_gem_purge(struct drm_i915_private *dev_priv, long target); 82 static unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv); 83 #endif 84 static void i915_gem_object_truncate(struct drm_i915_gem_object *obj); 85 86 static bool cpu_cache_is_coherent(struct drm_device *dev, 87 enum i915_cache_level level) 88 { 89 return HAS_LLC(dev) || level != I915_CACHE_NONE; 90 } 91 92 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 93 { 94 if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) 95 return true; 96 97 return obj->pin_display; 98 } 99 100 static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) 101 { 102 if (obj->tiling_mode) 103 i915_gem_release_mmap(obj); 104 105 /* As we do not have an associated fence register, we will force 106 * a tiling change if we ever need to acquire one. 107 */ 108 obj->fence_dirty = false; 109 obj->fence_reg = I915_FENCE_REG_NONE; 110 } 111 112 /* some bookkeeping */ 113 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 114 size_t size) 115 { 116 spin_lock(&dev_priv->mm.object_stat_lock); 117 dev_priv->mm.object_count++; 118 dev_priv->mm.object_memory += size; 119 spin_unlock(&dev_priv->mm.object_stat_lock); 120 } 121 122 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 123 size_t size) 124 { 125 spin_lock(&dev_priv->mm.object_stat_lock); 126 dev_priv->mm.object_count--; 127 dev_priv->mm.object_memory -= size; 128 spin_unlock(&dev_priv->mm.object_stat_lock); 129 } 130 131 static int 132 i915_gem_wait_for_error(struct i915_gpu_error *error) 133 { 134 int ret; 135 136 #define EXIT_COND (!i915_reset_in_progress(error) || \ 137 i915_terminally_wedged(error)) 138 if (EXIT_COND) 139 return 0; 140 141 /* 142 * Only wait 10 seconds for the gpu reset to complete to avoid hanging 143 * userspace. If it takes that long something really bad is going on and 144 * we should simply try to bail out and fail as gracefully as possible. 145 */ 146 ret = wait_event_interruptible_timeout(error->reset_queue, 147 EXIT_COND, 148 10*HZ); 149 if (ret == 0) { 150 DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); 151 return -EIO; 152 } else if (ret < 0) { 153 return ret; 154 } 155 #undef EXIT_COND 156 157 return 0; 158 } 159 160 int i915_mutex_lock_interruptible(struct drm_device *dev) 161 { 162 struct drm_i915_private *dev_priv = dev->dev_private; 163 int ret; 164 165 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 166 if (ret) 167 return ret; 168 169 ret = mutex_lock_interruptible(&dev->struct_mutex); 170 if (ret) 171 return ret; 172 173 WARN_ON(i915_verify_lists(dev)); 174 return 0; 175 } 176 177 static inline bool 178 i915_gem_object_is_inactive(struct drm_i915_gem_object *obj) 179 { 180 return i915_gem_obj_bound_any(obj) && !obj->active; 181 } 182 183 int 184 i915_gem_init_ioctl(struct drm_device *dev, void *data, 185 struct drm_file *file) 186 { 187 struct drm_i915_private *dev_priv = dev->dev_private; 188 struct drm_i915_gem_init *args = data; 189 190 if (drm_core_check_feature(dev, DRIVER_MODESET)) 191 return -ENODEV; 192 193 if (args->gtt_start >= args->gtt_end || 194 (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1)) 195 return -EINVAL; 196 197 /* GEM with user mode setting was never supported on ilk and later. */ 198 if (INTEL_INFO(dev)->gen >= 5) 199 return -ENODEV; 200 201 mutex_lock(&dev->struct_mutex); 202 i915_gem_setup_global_gtt(dev, args->gtt_start, args->gtt_end, 203 args->gtt_end); 204 dev_priv->gtt.mappable_end = args->gtt_end; 205 mutex_unlock(&dev->struct_mutex); 206 207 return 0; 208 } 209 210 int 211 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 212 struct drm_file *file) 213 { 214 struct drm_i915_private *dev_priv = dev->dev_private; 215 struct drm_i915_gem_get_aperture *args = data; 216 struct drm_i915_gem_object *obj; 217 size_t pinned; 218 219 pinned = 0; 220 mutex_lock(&dev->struct_mutex); 221 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) 222 if (obj->pin_count) 223 pinned += i915_gem_obj_ggtt_size(obj); 224 mutex_unlock(&dev->struct_mutex); 225 226 args->aper_size = dev_priv->gtt.base.total; 227 args->aper_available_size = args->aper_size - pinned; 228 229 return 0; 230 } 231 232 void *i915_gem_object_alloc(struct drm_device *dev) 233 { 234 return pool_get(&dev->objpl, PR_WAITOK | PR_ZERO); 235 } 236 237 void i915_gem_object_free(struct drm_i915_gem_object *obj) 238 { 239 pool_put(&obj->base.dev->objpl, obj); 240 } 241 242 static int 243 i915_gem_create(struct drm_file *file, 244 struct drm_device *dev, 245 uint64_t size, 246 uint32_t *handle_p) 247 { 248 struct drm_i915_gem_object *obj; 249 int ret; 250 u32 handle; 251 252 size = roundup(size, PAGE_SIZE); 253 if (size == 0) 254 return -EINVAL; 255 256 /* Allocate the new object */ 257 obj = i915_gem_alloc_object(dev, size); 258 if (obj == NULL) 259 return -ENOMEM; 260 261 ret = drm_gem_handle_create(file, &obj->base, &handle); 262 /* drop reference from allocate - handle holds it now */ 263 drm_gem_object_unreference_unlocked(&obj->base); 264 if (ret) 265 return ret; 266 267 *handle_p = handle; 268 return 0; 269 } 270 271 int 272 i915_gem_dumb_create(struct drm_file *file, 273 struct drm_device *dev, 274 struct drm_mode_create_dumb *args) 275 { 276 /* have to work out size/pitch and return them */ 277 args->pitch = roundup2(args->width * DIV_ROUND_UP(args->bpp, 8), 64); 278 args->size = args->pitch * args->height; 279 return i915_gem_create(file, dev, 280 args->size, &args->handle); 281 } 282 283 /** 284 * Creates a new mm object and returns a handle to it. 285 */ 286 int 287 i915_gem_create_ioctl(struct drm_device *dev, void *data, 288 struct drm_file *file) 289 { 290 struct drm_i915_gem_create *args = data; 291 292 return i915_gem_create(file, dev, 293 args->size, &args->handle); 294 } 295 296 static inline void 297 drm_clflush_virt_range(void *addr, size_t len) 298 { 299 pmap_flush_cache((vaddr_t)addr, len); 300 } 301 302 static inline int 303 __copy_to_user_swizzled(char __user *cpu_vaddr, 304 const char *gpu_vaddr, int gpu_offset, 305 int length) 306 { 307 int ret, cpu_offset = 0; 308 309 while (length > 0) { 310 int cacheline_end = roundup2(gpu_offset + 1, 64); 311 int this_length = min(cacheline_end - gpu_offset, length); 312 int swizzled_gpu_offset = gpu_offset ^ 64; 313 314 ret = __copy_to_user(cpu_vaddr + cpu_offset, 315 gpu_vaddr + swizzled_gpu_offset, 316 this_length); 317 if (ret) 318 return ret + length; 319 320 cpu_offset += this_length; 321 gpu_offset += this_length; 322 length -= this_length; 323 } 324 325 return 0; 326 } 327 328 static inline int 329 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, 330 const char __user *cpu_vaddr, 331 int length) 332 { 333 int ret, cpu_offset = 0; 334 335 while (length > 0) { 336 int cacheline_end = roundup2(gpu_offset + 1, 64); 337 int this_length = min(cacheline_end - gpu_offset, length); 338 int swizzled_gpu_offset = gpu_offset ^ 64; 339 340 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, 341 cpu_vaddr + cpu_offset, 342 this_length); 343 if (ret) 344 return ret + length; 345 346 cpu_offset += this_length; 347 gpu_offset += this_length; 348 length -= this_length; 349 } 350 351 return 0; 352 } 353 354 /* Per-page copy function for the shmem pread fastpath. 355 * Flushes invalid cachelines before reading the target if 356 * needs_clflush is set. */ 357 static int 358 shmem_pread_fast(struct vm_page *page, int shmem_page_offset, int page_length, 359 char __user *user_data, 360 bool page_do_bit17_swizzling, bool needs_clflush) 361 { 362 char *vaddr; 363 int ret; 364 365 if (unlikely(page_do_bit17_swizzling)) 366 return -EINVAL; 367 368 vaddr = kmap_atomic(page); 369 if (needs_clflush) 370 drm_clflush_virt_range(vaddr + shmem_page_offset, 371 page_length); 372 ret = __copy_to_user_inatomic(user_data, 373 vaddr + shmem_page_offset, 374 page_length); 375 kunmap_atomic(vaddr); 376 377 return ret ? -EFAULT : 0; 378 } 379 380 static void 381 shmem_clflush_swizzled_range(char *addr, unsigned long length, 382 bool swizzled) 383 { 384 if (unlikely(swizzled)) { 385 unsigned long start = (unsigned long) addr; 386 unsigned long end = (unsigned long) addr + length; 387 388 /* For swizzling simply ensure that we always flush both 389 * channels. Lame, but simple and it works. Swizzled 390 * pwrite/pread is far from a hotpath - current userspace 391 * doesn't use it at all. */ 392 start = round_down(start, 128); 393 end = round_up(end, 128); 394 395 drm_clflush_virt_range((void *)start, end - start); 396 } else { 397 drm_clflush_virt_range(addr, length); 398 } 399 400 } 401 402 /* Only difference to the fast-path function is that this can handle bit17 403 * and uses non-atomic copy and kmap functions. */ 404 static int 405 shmem_pread_slow(struct vm_page *page, int shmem_page_offset, int page_length, 406 char __user *user_data, 407 bool page_do_bit17_swizzling, bool needs_clflush) 408 { 409 char *vaddr; 410 int ret; 411 412 vaddr = kmap(page); 413 if (needs_clflush) 414 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 415 page_length, 416 page_do_bit17_swizzling); 417 418 if (page_do_bit17_swizzling) 419 ret = __copy_to_user_swizzled(user_data, 420 vaddr, shmem_page_offset, 421 page_length); 422 else 423 ret = __copy_to_user(user_data, 424 vaddr + shmem_page_offset, 425 page_length); 426 kunmap(vaddr); 427 428 return ret ? - EFAULT : 0; 429 } 430 431 static int 432 i915_gem_shmem_pread(struct drm_device *dev, 433 struct drm_i915_gem_object *obj, 434 struct drm_i915_gem_pread *args, 435 struct drm_file *file) 436 { 437 char __user *user_data; 438 ssize_t remain; 439 loff_t offset; 440 int shmem_page_offset, page_length, ret = 0; 441 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 442 int needs_clflush = 0; 443 int i; 444 445 user_data = to_user_ptr(args->data_ptr); 446 remain = args->size; 447 448 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 449 450 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) { 451 /* If we're not in the cpu read domain, set ourself into the gtt 452 * read domain and manually flush cachelines (if required). This 453 * optimizes for the case when the gpu will dirty the data 454 * anyway again before the next pread happens. */ 455 needs_clflush = !cpu_cache_is_coherent(dev, obj->cache_level); 456 ret = i915_gem_object_wait_rendering(obj, true); 457 if (ret) 458 return ret; 459 } 460 461 ret = i915_gem_object_get_pages(obj); 462 if (ret) 463 return ret; 464 465 i915_gem_object_pin_pages(obj); 466 467 offset = args->offset; 468 469 for (i = 0; i < (obj->base.size >> PAGE_SHIFT); i++) { 470 struct vm_page *page; 471 472 if (i < (offset >> PAGE_SHIFT)) 473 continue; 474 475 if (remain <= 0) 476 break; 477 478 /* Operation in this page 479 * 480 * shmem_page_offset = offset within page in shmem file 481 * page_length = bytes to copy for this page 482 */ 483 shmem_page_offset = offset_in_page(offset); 484 page_length = remain; 485 if ((shmem_page_offset + page_length) > PAGE_SIZE) 486 page_length = PAGE_SIZE - shmem_page_offset; 487 488 #ifdef __linux__ 489 page = sg_page(sg); 490 #else 491 page = obj->pages[i]; 492 #endif 493 page_do_bit17_swizzling = obj_do_bit17_swizzling && 494 (page_to_phys(page) & (1 << 17)) != 0; 495 496 ret = shmem_pread_fast(page, shmem_page_offset, page_length, 497 user_data, page_do_bit17_swizzling, 498 needs_clflush); 499 if (ret == 0) 500 goto next_page; 501 502 mutex_unlock(&dev->struct_mutex); 503 504 #ifdef __linux__ 505 if (likely(!i915_prefault_disable) && !prefaulted) { 506 ret = fault_in_multipages_writeable(user_data, remain); 507 /* Userspace is tricking us, but we've already clobbered 508 * its pages with the prefault and promised to write the 509 * data up to the first fault. Hence ignore any errors 510 * and just continue. */ 511 (void)ret; 512 prefaulted = 1; 513 } 514 #endif 515 516 ret = shmem_pread_slow(page, shmem_page_offset, page_length, 517 user_data, page_do_bit17_swizzling, 518 needs_clflush); 519 520 mutex_lock(&dev->struct_mutex); 521 522 next_page: 523 #ifdef __linux__ 524 mark_page_accessed(page); 525 #endif 526 527 if (ret) 528 goto out; 529 530 remain -= page_length; 531 user_data += page_length; 532 offset += page_length; 533 } 534 535 out: 536 i915_gem_object_unpin_pages(obj); 537 538 return ret; 539 } 540 541 /** 542 * Reads data from the object referenced by handle. 543 * 544 * On error, the contents of *data are undefined. 545 */ 546 int 547 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 548 struct drm_file *file) 549 { 550 struct drm_i915_gem_pread *args = data; 551 struct drm_i915_gem_object *obj; 552 int ret = 0; 553 554 if (args->size == 0) 555 return 0; 556 557 if (!access_ok(VERIFY_WRITE, 558 to_user_ptr(args->data_ptr), 559 args->size)) 560 return -EFAULT; 561 562 ret = i915_mutex_lock_interruptible(dev); 563 if (ret) 564 return ret; 565 566 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 567 if (&obj->base == NULL) { 568 ret = -ENOENT; 569 goto unlock; 570 } 571 572 /* Bounds check source. */ 573 if (args->offset > obj->base.size || 574 args->size > obj->base.size - args->offset) { 575 ret = -EINVAL; 576 goto out; 577 } 578 579 #ifdef notyet 580 /* prime objects have no backing filp to GEM pread/pwrite 581 * pages from. 582 */ 583 if (!obj->base.filp) { 584 ret = -EINVAL; 585 goto out; 586 } 587 #endif 588 589 trace_i915_gem_object_pread(obj, args->offset, args->size); 590 591 ret = i915_gem_shmem_pread(dev, obj, args, file); 592 593 out: 594 drm_gem_object_unreference(&obj->base); 595 unlock: 596 mutex_unlock(&dev->struct_mutex); 597 return ret; 598 } 599 600 #ifdef __linux__ 601 /* This is the fast write path which cannot handle 602 * page faults in the source data 603 */ 604 605 static inline int 606 fast_user_write(struct io_mapping *mapping, 607 loff_t page_base, int page_offset, 608 char __user *user_data, 609 int length) 610 { 611 void __iomem *vaddr_atomic; 612 void *vaddr; 613 unsigned long unwritten; 614 615 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); 616 /* We can use the cpu mem copy function because this is X86. */ 617 vaddr = (void __force*)vaddr_atomic + page_offset; 618 unwritten = __copy_from_user_inatomic_nocache(vaddr, 619 user_data, length); 620 io_mapping_unmap_atomic(vaddr_atomic); 621 return unwritten; 622 } 623 #else 624 /* This is the fast write path which cannot handle 625 * page faults in the source data 626 */ 627 628 static inline int 629 fast_user_write(struct drm_i915_private *dev_priv, 630 bus_size_t page_base, int page_offset, 631 char __user *user_data, 632 int length) 633 { 634 bus_space_handle_t bsh; 635 void __iomem *vaddr_atomic; 636 void *vaddr; 637 unsigned long unwritten; 638 639 agp_map_atomic(dev_priv->agph, page_base, &bsh); 640 vaddr_atomic = bus_space_vaddr(dev_priv->bst, bsh); 641 /* We can use the cpu mem copy function because this is X86. */ 642 vaddr = (void __force*)vaddr_atomic + page_offset; 643 unwritten = __copy_from_user_inatomic_nocache(vaddr, 644 user_data, length); 645 agp_unmap_atomic(dev_priv->agph, bsh); 646 return unwritten; 647 } 648 #endif 649 650 /** 651 * This is the fast pwrite path, where we copy the data directly from the 652 * user into the GTT, uncached. 653 */ 654 static int 655 i915_gem_gtt_pwrite_fast(struct drm_device *dev, 656 struct drm_i915_gem_object *obj, 657 struct drm_i915_gem_pwrite *args, 658 struct drm_file *file) 659 { 660 drm_i915_private_t *dev_priv = dev->dev_private; 661 ssize_t remain; 662 bus_size_t offset, page_base; 663 char __user *user_data; 664 int page_offset, page_length, ret; 665 666 ret = i915_gem_obj_ggtt_pin(obj, 0, true, true); 667 if (ret) 668 goto out; 669 670 ret = i915_gem_object_set_to_gtt_domain(obj, true); 671 if (ret) 672 goto out_unpin; 673 674 ret = i915_gem_object_put_fence(obj); 675 if (ret) 676 goto out_unpin; 677 678 user_data = to_user_ptr(args->data_ptr); 679 remain = args->size; 680 681 offset = i915_gem_obj_ggtt_offset(obj) + args->offset; 682 683 while (remain > 0) { 684 /* Operation in this page 685 * 686 * page_base = page offset within aperture 687 * page_offset = offset within page 688 * page_length = bytes to copy for this page 689 */ 690 page_base = offset & ~PAGE_MASK; 691 page_offset = offset_in_page(offset); 692 page_length = remain; 693 if ((page_offset + remain) > PAGE_SIZE) 694 page_length = PAGE_SIZE - page_offset; 695 696 /* If we get a fault while copying data, then (presumably) our 697 * source page isn't available. Return the error and we'll 698 * retry in the slow path. 699 */ 700 if (fast_user_write(dev_priv, page_base, 701 page_offset, user_data, page_length)) { 702 ret = -EFAULT; 703 goto out_unpin; 704 } 705 706 remain -= page_length; 707 user_data += page_length; 708 offset += page_length; 709 } 710 711 out_unpin: 712 i915_gem_object_unpin(obj); 713 out: 714 return ret; 715 } 716 717 /* Per-page copy function for the shmem pwrite fastpath. 718 * Flushes invalid cachelines before writing to the target if 719 * needs_clflush_before is set and flushes out any written cachelines after 720 * writing if needs_clflush is set. */ 721 static int 722 shmem_pwrite_fast(struct vm_page *page, int shmem_page_offset, int page_length, 723 char __user *user_data, 724 bool page_do_bit17_swizzling, 725 bool needs_clflush_before, 726 bool needs_clflush_after) 727 { 728 char *vaddr; 729 int ret; 730 731 if (unlikely(page_do_bit17_swizzling)) 732 return -EINVAL; 733 734 vaddr = kmap_atomic(page); 735 if (needs_clflush_before) 736 drm_clflush_virt_range(vaddr + shmem_page_offset, 737 page_length); 738 ret = __copy_from_user_inatomic_nocache(vaddr + shmem_page_offset, 739 user_data, 740 page_length); 741 if (needs_clflush_after) 742 drm_clflush_virt_range(vaddr + shmem_page_offset, 743 page_length); 744 kunmap_atomic(vaddr); 745 746 return ret ? -EFAULT : 0; 747 } 748 749 /* Only difference to the fast-path function is that this can handle bit17 750 * and uses non-atomic copy and kmap functions. */ 751 static int 752 shmem_pwrite_slow(struct vm_page *page, int shmem_page_offset, int page_length, 753 char __user *user_data, 754 bool page_do_bit17_swizzling, 755 bool needs_clflush_before, 756 bool needs_clflush_after) 757 { 758 char *vaddr; 759 int ret; 760 761 vaddr = kmap(page); 762 if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) 763 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 764 page_length, 765 page_do_bit17_swizzling); 766 if (page_do_bit17_swizzling) 767 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset, 768 user_data, 769 page_length); 770 else 771 ret = __copy_from_user(vaddr + shmem_page_offset, 772 user_data, 773 page_length); 774 if (needs_clflush_after) 775 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 776 page_length, 777 page_do_bit17_swizzling); 778 kunmap(vaddr); 779 780 return ret ? -EFAULT : 0; 781 } 782 783 static int 784 i915_gem_shmem_pwrite(struct drm_device *dev, 785 struct drm_i915_gem_object *obj, 786 struct drm_i915_gem_pwrite *args, 787 struct drm_file *file) 788 { 789 ssize_t remain; 790 loff_t offset; 791 char __user *user_data; 792 int shmem_page_offset, page_length, ret = 0; 793 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 794 int hit_slowpath = 0; 795 int needs_clflush_after = 0; 796 int needs_clflush_before = 0; 797 int i; 798 799 user_data = to_user_ptr(args->data_ptr); 800 remain = args->size; 801 802 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 803 804 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 805 /* If we're not in the cpu write domain, set ourself into the gtt 806 * write domain and manually flush cachelines (if required). This 807 * optimizes for the case when the gpu will use the data 808 * right away and we therefore have to clflush anyway. */ 809 needs_clflush_after = cpu_write_needs_clflush(obj); 810 ret = i915_gem_object_wait_rendering(obj, false); 811 if (ret) 812 return ret; 813 } 814 /* Same trick applies to invalidate partially written cachelines read 815 * before writing. */ 816 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) 817 needs_clflush_before = 818 !cpu_cache_is_coherent(dev, obj->cache_level); 819 820 ret = i915_gem_object_get_pages(obj); 821 if (ret) 822 return ret; 823 824 i915_gem_object_pin_pages(obj); 825 826 offset = args->offset; 827 obj->dirty = 1; 828 829 for (i = 0; i < (obj->base.size >> PAGE_SHIFT); i++) { 830 struct vm_page *page; 831 int partial_cacheline_write; 832 833 if (i < (offset >> PAGE_SHIFT)) 834 continue; 835 836 if (remain <= 0) 837 break; 838 839 /* Operation in this page 840 * 841 * shmem_page_offset = offset within page in shmem file 842 * page_length = bytes to copy for this page 843 */ 844 shmem_page_offset = offset_in_page(offset); 845 846 page_length = remain; 847 if ((shmem_page_offset + page_length) > PAGE_SIZE) 848 page_length = PAGE_SIZE - shmem_page_offset; 849 850 /* If we don't overwrite a cacheline completely we need to be 851 * careful to have up-to-date data by first clflushing. Don't 852 * overcomplicate things and flush the entire patch. */ 853 partial_cacheline_write = needs_clflush_before && 854 ((shmem_page_offset | page_length) 855 & (curcpu()->ci_cflushsz - 1)); 856 857 #ifdef __linux__ 858 page = sg_page(sg); 859 #else 860 page = obj->pages[i]; 861 #endif 862 page_do_bit17_swizzling = obj_do_bit17_swizzling && 863 (page_to_phys(page) & (1 << 17)) != 0; 864 865 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length, 866 user_data, page_do_bit17_swizzling, 867 partial_cacheline_write, 868 needs_clflush_after); 869 if (ret == 0) 870 goto next_page; 871 872 hit_slowpath = 1; 873 mutex_unlock(&dev->struct_mutex); 874 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length, 875 user_data, page_do_bit17_swizzling, 876 partial_cacheline_write, 877 needs_clflush_after); 878 879 mutex_lock(&dev->struct_mutex); 880 881 next_page: 882 set_page_dirty(page); 883 #ifdef __linux__ 884 mark_page_accessed(page); 885 #endif 886 887 if (ret) 888 goto out; 889 890 remain -= page_length; 891 user_data += page_length; 892 offset += page_length; 893 } 894 895 out: 896 i915_gem_object_unpin_pages(obj); 897 898 if (hit_slowpath) { 899 /* 900 * Fixup: Flush cpu caches in case we didn't flush the dirty 901 * cachelines in-line while writing and the object moved 902 * out of the cpu write domain while we've dropped the lock. 903 */ 904 if (!needs_clflush_after && 905 obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 906 if (i915_gem_clflush_object(obj, obj->pin_display)) 907 i915_gem_chipset_flush(dev); 908 } 909 } 910 911 if (needs_clflush_after) 912 i915_gem_chipset_flush(dev); 913 914 return ret; 915 } 916 917 /** 918 * Writes data to the object referenced by handle. 919 * 920 * On error, the contents of the buffer that were to be modified are undefined. 921 */ 922 int 923 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 924 struct drm_file *file) 925 { 926 struct drm_i915_gem_pwrite *args = data; 927 struct drm_i915_gem_object *obj; 928 int ret; 929 930 if (args->size == 0) 931 return 0; 932 933 if (!access_ok(VERIFY_READ, 934 to_user_ptr(args->data_ptr), 935 args->size)) 936 return -EFAULT; 937 938 #ifdef __linux__ 939 if (likely(!i915_prefault_disable)) { 940 ret = fault_in_multipages_readable(to_user_ptr(args->data_ptr), 941 args->size); 942 if (ret) 943 return -EFAULT; 944 } 945 #endif 946 947 ret = i915_mutex_lock_interruptible(dev); 948 if (ret) 949 return ret; 950 951 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 952 if (&obj->base == NULL) { 953 ret = -ENOENT; 954 goto unlock; 955 } 956 957 /* Bounds check destination. */ 958 if (args->offset > obj->base.size || 959 args->size > obj->base.size - args->offset) { 960 ret = -EINVAL; 961 goto out; 962 } 963 964 #ifdef notyet 965 /* prime objects have no backing filp to GEM pread/pwrite 966 * pages from. 967 */ 968 if (!obj->base.filp) { 969 ret = -EINVAL; 970 goto out; 971 } 972 #endif 973 974 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 975 976 ret = -EFAULT; 977 /* We can only do the GTT pwrite on untiled buffers, as otherwise 978 * it would end up going through the fenced access, and we'll get 979 * different detiling behavior between reading and writing. 980 * pread/pwrite currently are reading and writing from the CPU 981 * perspective, requiring manual detiling by the client. 982 */ 983 if (obj->phys_obj) { 984 ret = i915_gem_phys_pwrite(dev, obj, args, file); 985 goto out; 986 } 987 988 if (obj->tiling_mode == I915_TILING_NONE && 989 obj->base.write_domain != I915_GEM_DOMAIN_CPU && 990 cpu_write_needs_clflush(obj)) { 991 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); 992 /* Note that the gtt paths might fail with non-page-backed user 993 * pointers (e.g. gtt mappings when moving data between 994 * textures). Fallback to the shmem path in that case. */ 995 } 996 997 if (ret == -EFAULT || ret == -ENOSPC) 998 ret = i915_gem_shmem_pwrite(dev, obj, args, file); 999 1000 out: 1001 drm_gem_object_unreference(&obj->base); 1002 unlock: 1003 mutex_unlock(&dev->struct_mutex); 1004 return ret; 1005 } 1006 1007 int 1008 i915_gem_check_wedge(struct i915_gpu_error *error, 1009 bool interruptible) 1010 { 1011 if (i915_reset_in_progress(error)) { 1012 /* Non-interruptible callers can't handle -EAGAIN, hence return 1013 * -EIO unconditionally for these. */ 1014 if (!interruptible) 1015 return -EIO; 1016 1017 /* Recovery complete, but the reset failed ... */ 1018 if (i915_terminally_wedged(error)) 1019 return -EIO; 1020 1021 return -EAGAIN; 1022 } 1023 1024 return 0; 1025 } 1026 1027 /* 1028 * Compare seqno against outstanding lazy request. Emit a request if they are 1029 * equal. 1030 */ 1031 static int 1032 i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno) 1033 { 1034 int ret; 1035 1036 BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex)); 1037 1038 ret = 0; 1039 if (seqno == ring->outstanding_lazy_seqno) 1040 ret = i915_add_request(ring, NULL); 1041 1042 return ret; 1043 } 1044 1045 #ifdef notyet 1046 static void fake_irq(unsigned long data) 1047 { 1048 wake_up_process((struct task_struct *)data); 1049 } 1050 #endif 1051 1052 static bool missed_irq(struct drm_i915_private *dev_priv, 1053 struct intel_ring_buffer *ring) 1054 { 1055 return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings); 1056 } 1057 1058 static bool can_wait_boost(struct drm_i915_file_private *file_priv) 1059 { 1060 if (file_priv == NULL) 1061 return true; 1062 1063 return !atomic_xchg(&file_priv->rps_wait_boost, true); 1064 } 1065 1066 /** 1067 * __wait_seqno - wait until execution of seqno has finished 1068 * @ring: the ring expected to report seqno 1069 * @seqno: duh! 1070 * @reset_counter: reset sequence associated with the given seqno 1071 * @interruptible: do an interruptible wait (normally yes) 1072 * @timeout: in - how long to wait (NULL forever); out - how much time remaining 1073 * 1074 * Note: It is of utmost importance that the passed in seqno and reset_counter 1075 * values have been read by the caller in an smp safe manner. Where read-side 1076 * locks are involved, it is sufficient to read the reset_counter before 1077 * unlocking the lock that protects the seqno. For lockless tricks, the 1078 * reset_counter _must_ be read before, and an appropriate smp_rmb must be 1079 * inserted. 1080 * 1081 * Returns 0 if the seqno was found within the alloted time. Else returns the 1082 * errno with remaining time filled in timeout argument. 1083 */ 1084 #ifdef __linux__ 1085 static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, 1086 unsigned reset_counter, 1087 bool interruptible, 1088 struct timespec *timeout, 1089 struct drm_i915_file_private *file_priv) 1090 { 1091 drm_i915_private_t *dev_priv = ring->dev->dev_private; 1092 const bool irq_test_in_progress = 1093 ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring); 1094 struct timespec before, now; 1095 DEFINE_WAIT(wait); 1096 unsigned long timeout_expire; 1097 int ret; 1098 1099 WARN(dev_priv->pc8.irqs_disabled, "IRQs disabled\n"); 1100 1101 if (i915_seqno_passed(ring->get_seqno(ring, true), seqno)) 1102 return 0; 1103 1104 timeout_expire = timeout ? jiffies + timespec_to_jiffies_timeout(timeout) : 0; 1105 1106 if (dev_priv->info->gen >= 6 && can_wait_boost(file_priv)) { 1107 gen6_rps_boost(dev_priv); 1108 if (file_priv) 1109 mod_delayed_work(dev_priv->wq, 1110 &file_priv->mm.idle_work, 1111 msecs_to_jiffies(100)); 1112 } 1113 1114 if (!irq_test_in_progress && WARN_ON(!ring->irq_get(ring))) 1115 return -ENODEV; 1116 1117 /* Record current time in case interrupted by signal, or wedged */ 1118 trace_i915_gem_request_wait_begin(ring, seqno); 1119 getrawmonotonic(&before); 1120 for (;;) { 1121 struct timer_list timer; 1122 1123 prepare_to_wait(&ring->irq_queue, &wait, 1124 interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE); 1125 1126 /* We need to check whether any gpu reset happened in between 1127 * the caller grabbing the seqno and now ... */ 1128 if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) { 1129 /* ... but upgrade the -EAGAIN to an -EIO if the gpu 1130 * is truely gone. */ 1131 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); 1132 if (ret == 0) 1133 ret = -EAGAIN; 1134 break; 1135 } 1136 1137 if (i915_seqno_passed(ring->get_seqno(ring, false), seqno)) { 1138 ret = 0; 1139 break; 1140 } 1141 1142 if (interruptible && signal_pending(current)) { 1143 ret = -ERESTARTSYS; 1144 break; 1145 } 1146 1147 if (timeout && time_after_eq(jiffies, timeout_expire)) { 1148 ret = -ETIME; 1149 break; 1150 } 1151 1152 timer.function = NULL; 1153 if (timeout || missed_irq(dev_priv, ring)) { 1154 unsigned long expire; 1155 1156 setup_timer_on_stack(&timer, fake_irq, (unsigned long)current); 1157 expire = missed_irq(dev_priv, ring) ? jiffies + 1 : timeout_expire; 1158 mod_timer(&timer, expire); 1159 } 1160 1161 io_schedule(); 1162 1163 if (timer.function) { 1164 del_singleshot_timer_sync(&timer); 1165 destroy_timer_on_stack(&timer); 1166 } 1167 } 1168 getrawmonotonic(&now); 1169 trace_i915_gem_request_wait_end(ring, seqno); 1170 1171 if (!irq_test_in_progress) 1172 ring->irq_put(ring); 1173 1174 finish_wait(&ring->irq_queue, &wait); 1175 1176 if (timeout) { 1177 struct timespec sleep_time = timespec_sub(now, before); 1178 *timeout = timespec_sub(*timeout, sleep_time); 1179 if (!timespec_valid(timeout)) /* i.e. negative time remains */ 1180 set_normalized_timespec(timeout, 0, 0); 1181 } 1182 1183 return ret; 1184 } 1185 #else 1186 static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, 1187 unsigned reset_counter, 1188 bool interruptible, 1189 struct timespec *timeout, 1190 struct drm_i915_file_private *file_priv) 1191 { 1192 drm_i915_private_t *dev_priv = ring->dev->dev_private; 1193 const bool irq_test_in_progress = 1194 ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring); 1195 struct timespec before, now; 1196 struct sleep_state sls; 1197 unsigned long timeout_expire; 1198 int ret = 0; 1199 1200 WARN(dev_priv->pc8.irqs_disabled, "IRQs disabled\n"); 1201 1202 if (i915_seqno_passed(ring->get_seqno(ring, true), seqno)) 1203 return 0; 1204 1205 timeout_expire = timeout ? jiffies + timespec_to_jiffies_timeout(timeout) : 0; 1206 1207 if (dev_priv->info->gen >= 6 && can_wait_boost(file_priv)) { 1208 gen6_rps_boost(dev_priv); 1209 if (file_priv) 1210 mod_delayed_work(dev_priv->wq, 1211 &file_priv->mm.idle_work, 1212 msecs_to_jiffies(100)); 1213 } 1214 1215 if (!irq_test_in_progress && WARN_ON(!ring->irq_get(ring))) 1216 return -ENODEV; 1217 1218 /* Record current time in case interrupted by signal, or wedged */ 1219 trace_i915_gem_request_wait_begin(ring, seqno); 1220 getrawmonotonic(&before); 1221 for (;;) { 1222 atomic_inc_int(&ring->irq_queue.count); 1223 sleep_setup(&sls, &ring->irq_queue, interruptible ? PCATCH : 0, "wseq"); 1224 1225 /* We need to check whether any gpu reset happened in between 1226 * the caller grabbing the seqno and now ... */ 1227 if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) { 1228 /* ... but upgrade the -EAGAIN to an -EIO if the gpu 1229 * is truely gone. */ 1230 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); 1231 if (ret == 0) 1232 ret = -EAGAIN; 1233 break; 1234 } 1235 1236 if (i915_seqno_passed(ring->get_seqno(ring, false), seqno)) { 1237 ret = 0; 1238 break; 1239 } 1240 1241 if (interruptible && ret) { 1242 ret = -ERESTARTSYS; 1243 break; 1244 } 1245 1246 if (timeout && time_after_eq(jiffies, timeout_expire)) { 1247 ret = -ETIME; 1248 break; 1249 } 1250 1251 if (timeout || missed_irq(dev_priv, ring)) { 1252 unsigned long expire; 1253 int timo; 1254 1255 expire = missed_irq(dev_priv, ring) ? jiffies + 1 : timeout_expire; 1256 timo = expire - jiffies; 1257 if (timo < 1) 1258 timo = 1; 1259 sleep_setup_timeout(&sls, timo); 1260 } 1261 1262 sleep_setup_signal(&sls, interruptible ? PCATCH : 0); 1263 1264 sleep_finish(&sls, 1); 1265 sleep_finish_timeout(&sls); 1266 ret = sleep_finish_signal(&sls); 1267 atomic_dec_int(&ring->irq_queue.count); 1268 } 1269 getrawmonotonic(&now); 1270 trace_i915_gem_request_wait_end(ring, seqno); 1271 1272 if (!irq_test_in_progress) 1273 ring->irq_put(ring); 1274 1275 sleep_finish(&sls, 0); 1276 sleep_finish_timeout(&sls); 1277 sleep_finish_signal(&sls); 1278 atomic_dec_int(&ring->irq_queue.count); 1279 1280 if (timeout) { 1281 struct timespec sleep_time = timespec_sub(now, before); 1282 *timeout = timespec_sub(*timeout, sleep_time); 1283 if (!timespec_valid(timeout)) /* i.e. negative time remains */ 1284 set_normalized_timespec(timeout, 0, 0); 1285 } 1286 1287 return ret; 1288 } 1289 #endif 1290 1291 /** 1292 * Waits for a sequence number to be signaled, and cleans up the 1293 * request and object lists appropriately for that event. 1294 */ 1295 int 1296 i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno) 1297 { 1298 struct drm_device *dev = ring->dev; 1299 struct drm_i915_private *dev_priv = dev->dev_private; 1300 bool interruptible = dev_priv->mm.interruptible; 1301 int ret; 1302 1303 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1304 BUG_ON(seqno == 0); 1305 1306 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); 1307 if (ret) 1308 return ret; 1309 1310 ret = i915_gem_check_olr(ring, seqno); 1311 if (ret) 1312 return ret; 1313 1314 return __wait_seqno(ring, seqno, 1315 atomic_read(&dev_priv->gpu_error.reset_counter), 1316 interruptible, NULL, NULL); 1317 } 1318 1319 static int 1320 i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj, 1321 struct intel_ring_buffer *ring) 1322 { 1323 i915_gem_retire_requests_ring(ring); 1324 1325 /* Manually manage the write flush as we may have not yet 1326 * retired the buffer. 1327 * 1328 * Note that the last_write_seqno is always the earlier of 1329 * the two (read/write) seqno, so if we haved successfully waited, 1330 * we know we have passed the last write. 1331 */ 1332 obj->last_write_seqno = 0; 1333 obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; 1334 1335 return 0; 1336 } 1337 1338 /** 1339 * Ensures that all rendering to the object has completed and the object is 1340 * safe to unbind from the GTT or access from the CPU. 1341 */ 1342 static __must_check int 1343 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, 1344 bool readonly) 1345 { 1346 struct intel_ring_buffer *ring = obj->ring; 1347 u32 seqno; 1348 int ret; 1349 1350 seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno; 1351 if (seqno == 0) 1352 return 0; 1353 1354 ret = i915_wait_seqno(ring, seqno); 1355 if (ret) 1356 return ret; 1357 1358 return i915_gem_object_wait_rendering__tail(obj, ring); 1359 } 1360 1361 /* A nonblocking variant of the above wait. This is a highly dangerous routine 1362 * as the object state may change during this call. 1363 */ 1364 static __must_check int 1365 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, 1366 struct drm_file *file, 1367 bool readonly) 1368 { 1369 struct drm_device *dev = obj->base.dev; 1370 struct drm_i915_private *dev_priv = dev->dev_private; 1371 struct intel_ring_buffer *ring = obj->ring; 1372 unsigned reset_counter; 1373 u32 seqno; 1374 int ret; 1375 1376 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1377 BUG_ON(!dev_priv->mm.interruptible); 1378 1379 seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno; 1380 if (seqno == 0) 1381 return 0; 1382 1383 ret = i915_gem_check_wedge(&dev_priv->gpu_error, true); 1384 if (ret) 1385 return ret; 1386 1387 ret = i915_gem_check_olr(ring, seqno); 1388 if (ret) 1389 return ret; 1390 1391 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 1392 mutex_unlock(&dev->struct_mutex); 1393 ret = __wait_seqno(ring, seqno, reset_counter, true, NULL, file->driver_priv); 1394 mutex_lock(&dev->struct_mutex); 1395 if (ret) 1396 return ret; 1397 1398 return i915_gem_object_wait_rendering__tail(obj, ring); 1399 } 1400 1401 /** 1402 * Called when user space prepares to use an object with the CPU, either 1403 * through the mmap ioctl's mapping or a GTT mapping. 1404 */ 1405 int 1406 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1407 struct drm_file *file) 1408 { 1409 struct drm_i915_gem_set_domain *args = data; 1410 struct drm_i915_gem_object *obj; 1411 uint32_t read_domains = args->read_domains; 1412 uint32_t write_domain = args->write_domain; 1413 int ret; 1414 1415 /* Only handle setting domains to types used by the CPU. */ 1416 if (write_domain & I915_GEM_GPU_DOMAINS) 1417 return -EINVAL; 1418 1419 if (read_domains & I915_GEM_GPU_DOMAINS) 1420 return -EINVAL; 1421 1422 /* Having something in the write domain implies it's in the read 1423 * domain, and only that read domain. Enforce that in the request. 1424 */ 1425 if (write_domain != 0 && read_domains != write_domain) 1426 return -EINVAL; 1427 1428 ret = i915_mutex_lock_interruptible(dev); 1429 if (ret) 1430 return ret; 1431 1432 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1433 if (&obj->base == NULL) { 1434 ret = -ENOENT; 1435 goto unlock; 1436 } 1437 1438 /* Try to flush the object off the GPU without holding the lock. 1439 * We will repeat the flush holding the lock in the normal manner 1440 * to catch cases where we are gazumped. 1441 */ 1442 ret = i915_gem_object_wait_rendering__nonblocking(obj, file, !write_domain); 1443 if (ret) 1444 goto unref; 1445 1446 if (read_domains & I915_GEM_DOMAIN_GTT) { 1447 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 1448 1449 /* Silently promote "you're not bound, there was nothing to do" 1450 * to success, since the client was just asking us to 1451 * make sure everything was done. 1452 */ 1453 if (ret == -EINVAL) 1454 ret = 0; 1455 } else { 1456 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 1457 } 1458 1459 unref: 1460 drm_gem_object_unreference(&obj->base); 1461 unlock: 1462 mutex_unlock(&dev->struct_mutex); 1463 return ret; 1464 } 1465 1466 /** 1467 * Called when user space has done writes to this buffer 1468 */ 1469 int 1470 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1471 struct drm_file *file) 1472 { 1473 struct drm_i915_gem_sw_finish *args = data; 1474 struct drm_i915_gem_object *obj; 1475 int ret = 0; 1476 1477 ret = i915_mutex_lock_interruptible(dev); 1478 if (ret) 1479 return ret; 1480 1481 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1482 if (&obj->base == NULL) { 1483 ret = -ENOENT; 1484 goto unlock; 1485 } 1486 1487 /* Pinned buffers may be scanout, so flush the cache */ 1488 if (obj->pin_display) 1489 i915_gem_object_flush_cpu_write_domain(obj, true); 1490 1491 drm_gem_object_unreference(&obj->base); 1492 unlock: 1493 mutex_unlock(&dev->struct_mutex); 1494 return ret; 1495 } 1496 1497 /** 1498 * Maps the contents of an object, returning the address it is mapped 1499 * into. 1500 * 1501 * While the mapping holds a reference on the contents of the object, it doesn't 1502 * imply a ref on the object itself. 1503 */ 1504 int 1505 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1506 struct drm_file *file) 1507 { 1508 struct drm_i915_gem_mmap *args = data; 1509 struct drm_gem_object *obj; 1510 vaddr_t addr; 1511 vsize_t size; 1512 int ret; 1513 1514 size = round_page(args->size); 1515 if (size == 0) 1516 return -EINVAL; 1517 1518 if (args->offset + size < args->offset) 1519 return -EINVAL; 1520 if (args->offset & PAGE_MASK) 1521 return -EINVAL; 1522 1523 obj = drm_gem_object_lookup(dev, file, args->handle); 1524 if (obj == NULL) 1525 return -ENOENT; 1526 1527 #ifdef notyet 1528 /* prime objects have no backing filp to GEM mmap 1529 * pages from. 1530 */ 1531 if (!obj->filp) { 1532 drm_gem_object_unreference_unlocked(obj); 1533 return -EINVAL; 1534 } 1535 #endif 1536 1537 addr = 0; 1538 ret = -uvm_map(&curproc->p_vmspace->vm_map, &addr, size, 1539 obj->uao, args->offset, 0, UVM_MAPFLAG(PROT_READ | PROT_WRITE, 1540 PROT_READ | PROT_WRITE, MAP_INHERIT_SHARE, MADV_RANDOM, 0)); 1541 if (ret == 0) 1542 uao_reference(obj->uao); 1543 drm_gem_object_unreference_unlocked(obj); 1544 if (ret) 1545 return ret; 1546 1547 args->addr_ptr = (uint64_t) addr; 1548 1549 return 0; 1550 } 1551 1552 int 1553 i915_gem_fault(struct drm_gem_object *gem_obj, struct uvm_faultinfo *ufi, 1554 off_t offset, vaddr_t vaddr, vm_page_t *pps, int npages, int centeridx, 1555 vm_prot_t access_type, int flags) 1556 { 1557 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 1558 struct drm_device *dev = obj->base.dev; 1559 drm_i915_private_t *dev_priv = dev->dev_private; 1560 paddr_t paddr; 1561 int lcv, ret = 0; 1562 int write = !!(access_type & PROT_WRITE); 1563 vm_prot_t mapprot; 1564 boolean_t locked = TRUE; 1565 1566 intel_runtime_pm_get(dev_priv); 1567 1568 /* 1569 * If we already own the lock, we must be doing a copyin or 1570 * copyout in one of the fast paths. Return failure such that 1571 * we fall back on the slow path. 1572 */ 1573 if (!drm_vma_node_has_offset(&obj->base.vma_node) || 1574 RWLOCK_OWNER(&dev->struct_mutex) == curproc) { 1575 uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, 1576 &obj->base.uobj, NULL); 1577 ret = VM_PAGER_BAD; 1578 goto out; 1579 } 1580 1581 offset -= drm_vma_node_offset_addr(&obj->base.vma_node); 1582 1583 if (rw_enter(&dev->struct_mutex, RW_NOSLEEP | RW_WRITE) != 0) { 1584 uvmfault_unlockall(ufi, NULL, &obj->base.uobj, NULL); 1585 mutex_lock(&dev->struct_mutex); 1586 locked = uvmfault_relock(ufi); 1587 } 1588 if (!locked) { 1589 mutex_unlock(&dev->struct_mutex); 1590 ret = VM_PAGER_REFAULT; 1591 goto out; 1592 } 1593 1594 /* Access to snoopable pages through the GTT is incoherent. */ 1595 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) { 1596 ret = -EINVAL; 1597 goto unlock; 1598 } 1599 1600 /* Now bind it into the GTT if needed */ 1601 ret = i915_gem_obj_ggtt_pin(obj, 0, true, false); 1602 if (ret) 1603 goto unlock; 1604 1605 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1606 if (ret) 1607 goto unpin; 1608 1609 ret = i915_gem_object_get_fence(obj); 1610 if (ret) 1611 goto unpin; 1612 1613 obj->fault_mappable = true; 1614 1615 mapprot = ufi->entry->protection; 1616 /* 1617 * if it's only a read fault, we only put ourselves into the gtt 1618 * read domain, so make sure we fault again and set ourselves to write. 1619 * this prevents us needing userland to do domain management and get 1620 * it wrong, and makes us fully coherent with the gpu re mmap. 1621 */ 1622 if (write == 0) 1623 mapprot &= ~PROT_WRITE; 1624 /* XXX try and be more efficient when we do this */ 1625 for (lcv = 0 ; lcv < npages ; lcv++, offset += PAGE_SIZE, 1626 vaddr += PAGE_SIZE) { 1627 if ((flags & PGO_ALLPAGES) == 0 && lcv != centeridx) 1628 continue; 1629 1630 if (pps[lcv] == PGO_DONTCARE) 1631 continue; 1632 1633 paddr = dev_priv->gtt.mappable_base + 1634 i915_gem_obj_ggtt_offset(obj) + offset; 1635 1636 if (pmap_enter(ufi->orig_map->pmap, vaddr, paddr, 1637 mapprot, PMAP_CANFAIL | mapprot) != 0) { 1638 i915_gem_object_unpin(obj); 1639 uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, 1640 NULL, NULL); 1641 mutex_unlock(&dev->struct_mutex); 1642 pmap_update(ufi->orig_map->pmap); 1643 uvm_wait("intelflt"); 1644 ret = VM_PAGER_REFAULT; 1645 goto out; 1646 } 1647 } 1648 unpin: 1649 i915_gem_object_unpin(obj); 1650 unlock: 1651 uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, NULL, NULL); 1652 mutex_unlock(&dev->struct_mutex); 1653 pmap_update(ufi->orig_map->pmap); 1654 1655 switch (ret) { 1656 case -EIO: 1657 /* 1658 * We eat errors when the gpu is terminally wedged to avoid 1659 * userspace unduly crashing (gl has no provisions for mmaps to 1660 * fail). But any other -EIO isn't ours (e.g. swap in failure) 1661 * and so needs to be reported. 1662 */ 1663 if (!i915_terminally_wedged(&dev_priv->gpu_error)) { 1664 ret = VM_PAGER_ERROR; 1665 break; 1666 } 1667 case -EAGAIN: 1668 /* 1669 * EAGAIN means the gpu is hung and we'll wait for the error 1670 * handler to reset everything when re-faulting in 1671 * i915_mutex_lock_interruptible. 1672 */ 1673 case 0: 1674 case -ERESTART: 1675 case -EINTR: 1676 case -EBUSY: 1677 /* 1678 * EBUSY is ok: this just means that another thread 1679 * already did the job. 1680 */ 1681 ret = VM_PAGER_OK; 1682 break; 1683 case -ENOMEM: 1684 ret = VM_PAGER_ERROR; 1685 break; 1686 case -ENOSPC: 1687 ret = VM_PAGER_ERROR; 1688 break; 1689 default: 1690 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 1691 ret = VM_PAGER_ERROR; 1692 break; 1693 } 1694 1695 out: 1696 intel_runtime_pm_put(dev_priv); 1697 return ret; 1698 } 1699 1700 void i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv) 1701 { 1702 struct i915_vma *vma; 1703 1704 /* 1705 * Only the global gtt is relevant for gtt memory mappings, so restrict 1706 * list traversal to objects bound into the global address space. Note 1707 * that the active list should be empty, but better safe than sorry. 1708 */ 1709 WARN_ON(!list_empty(&dev_priv->gtt.base.active_list)); 1710 list_for_each_entry(vma, &dev_priv->gtt.base.active_list, mm_list) 1711 i915_gem_release_mmap(vma->obj); 1712 list_for_each_entry(vma, &dev_priv->gtt.base.inactive_list, mm_list) 1713 i915_gem_release_mmap(vma->obj); 1714 } 1715 1716 /** 1717 * i915_gem_release_mmap - remove physical page mappings 1718 * @obj: obj in question 1719 * 1720 * Preserve the reservation of the mmapping with the DRM core code, but 1721 * relinquish ownership of the pages back to the system. 1722 * 1723 * It is vital that we remove the page mapping if we have mapped a tiled 1724 * object through the GTT and then lose the fence register due to 1725 * resource pressure. Similarly if the object has been moved out of the 1726 * aperture, than pages mapped into userspace must be revoked. Removing the 1727 * mapping will then trigger a page fault on the next user access, allowing 1728 * fixup by i915_gem_fault(). 1729 */ 1730 void 1731 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 1732 { 1733 struct inteldrm_softc *dev_priv = obj->base.dev->dev_private; 1734 struct vm_page *pg; 1735 1736 if (!obj->fault_mappable) 1737 return; 1738 1739 for (pg = &dev_priv->pgs[atop(i915_gem_obj_ggtt_offset(obj))]; 1740 pg != &dev_priv->pgs[atop(i915_gem_obj_ggtt_offset(obj) + obj->base.size)]; 1741 pg++) 1742 pmap_page_protect(pg, PROT_NONE); 1743 1744 drm_vma_node_unmap(&obj->base.vma_node, obj->base.dev->dev_mapping); 1745 obj->fault_mappable = false; 1746 } 1747 1748 uint32_t 1749 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) 1750 { 1751 uint32_t gtt_size; 1752 1753 if (INTEL_INFO(dev)->gen >= 4 || 1754 tiling_mode == I915_TILING_NONE) 1755 return size; 1756 1757 /* Previous chips need a power-of-two fence region when tiling */ 1758 if (INTEL_INFO(dev)->gen == 3) 1759 gtt_size = 1024*1024; 1760 else 1761 gtt_size = 512*1024; 1762 1763 while (gtt_size < size) 1764 gtt_size <<= 1; 1765 1766 return gtt_size; 1767 } 1768 1769 /** 1770 * i915_gem_get_gtt_alignment - return required GTT alignment for an object 1771 * @obj: object to check 1772 * 1773 * Return the required GTT alignment for an object, taking into account 1774 * potential fence register mapping. 1775 */ 1776 uint32_t 1777 i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size, 1778 int tiling_mode, bool fenced) 1779 { 1780 /* 1781 * Minimum alignment is 4k (GTT page size), but might be greater 1782 * if a fence register is needed for the object. 1783 */ 1784 if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) || 1785 tiling_mode == I915_TILING_NONE) 1786 return 4096; 1787 1788 /* 1789 * Previous chips need to be aligned to the size of the smallest 1790 * fence register that can contain the object. 1791 */ 1792 return i915_gem_get_gtt_size(dev, size, tiling_mode); 1793 } 1794 1795 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) 1796 { 1797 #if 0 1798 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 1799 #endif 1800 int ret; 1801 1802 if (drm_vma_node_has_offset(&obj->base.vma_node)) 1803 return 0; 1804 1805 #if 0 1806 dev_priv->mm.shrinker_no_lock_stealing = true; 1807 #endif 1808 1809 ret = drm_gem_create_mmap_offset(&obj->base); 1810 #if 0 1811 if (ret != -ENOSPC) 1812 goto out; 1813 1814 /* Badly fragmented mmap space? The only way we can recover 1815 * space is by destroying unwanted objects. We can't randomly release 1816 * mmap_offsets as userspace expects them to be persistent for the 1817 * lifetime of the objects. The closest we can is to release the 1818 * offsets on purgeable objects by truncating it and marking it purged, 1819 * which prevents userspace from ever using that object again. 1820 */ 1821 i915_gem_purge(dev_priv, obj->base.size >> PAGE_SHIFT); 1822 ret = drm_gem_create_mmap_offset(&obj->base); 1823 if (ret != -ENOSPC) 1824 goto out; 1825 1826 i915_gem_shrink_all(dev_priv); 1827 ret = drm_gem_create_mmap_offset(&obj->base); 1828 out: 1829 dev_priv->mm.shrinker_no_lock_stealing = false; 1830 #endif 1831 1832 return ret; 1833 } 1834 1835 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) 1836 { 1837 drm_gem_free_mmap_offset(&obj->base); 1838 } 1839 1840 int 1841 i915_gem_mmap_gtt(struct drm_file *file, 1842 struct drm_device *dev, 1843 uint32_t handle, 1844 uint64_t *offset) 1845 { 1846 struct drm_i915_private *dev_priv = dev->dev_private; 1847 struct drm_i915_gem_object *obj; 1848 int ret; 1849 1850 ret = i915_mutex_lock_interruptible(dev); 1851 if (ret) 1852 return ret; 1853 1854 obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle)); 1855 if (&obj->base == NULL) { 1856 ret = -ENOENT; 1857 goto unlock; 1858 } 1859 1860 if (obj->base.size > dev_priv->gtt.mappable_end) { 1861 ret = -E2BIG; 1862 goto out; 1863 } 1864 1865 if (obj->madv != I915_MADV_WILLNEED) { 1866 DRM_ERROR("Attempting to mmap a purgeable buffer\n"); 1867 ret = -EINVAL; 1868 goto out; 1869 } 1870 1871 ret = i915_gem_object_create_mmap_offset(obj); 1872 if (ret) 1873 goto out; 1874 1875 *offset = drm_vma_node_offset_addr(&obj->base.vma_node); 1876 1877 out: 1878 drm_gem_object_unreference(&obj->base); 1879 unlock: 1880 mutex_unlock(&dev->struct_mutex); 1881 return ret; 1882 } 1883 1884 /** 1885 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 1886 * @dev: DRM device 1887 * @data: GTT mapping ioctl data 1888 * @file: GEM object info 1889 * 1890 * Simply returns the fake offset to userspace so it can mmap it. 1891 * The mmap call will end up in drm_gem_mmap(), which will set things 1892 * up so we can get faults in the handler above. 1893 * 1894 * The fault handler will take care of binding the object into the GTT 1895 * (since it may have been evicted to make room for something), allocating 1896 * a fence register, and mapping the appropriate aperture address into 1897 * userspace. 1898 */ 1899 int 1900 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 1901 struct drm_file *file) 1902 { 1903 struct drm_i915_gem_mmap_gtt *args = data; 1904 1905 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); 1906 } 1907 1908 /* Immediately discard the backing storage */ 1909 static void 1910 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 1911 { 1912 i915_gem_object_free_mmap_offset(obj); 1913 1914 obj->base.uao->pgops->pgo_flush(obj->base.uao, 0, obj->base.size, 1915 PGO_ALLPAGES | PGO_FREE); 1916 1917 obj->madv = __I915_MADV_PURGED; 1918 } 1919 1920 static inline int 1921 i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj) 1922 { 1923 return obj->madv == I915_MADV_DONTNEED; 1924 } 1925 1926 static void 1927 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) 1928 { 1929 #ifdef __linux__ 1930 struct sg_page_iter sg_iter; 1931 #else 1932 int i, page_count = obj->base.size / PAGE_SIZE; 1933 #endif 1934 int ret; 1935 1936 BUG_ON(obj->madv == __I915_MADV_PURGED); 1937 1938 ret = i915_gem_object_set_to_cpu_domain(obj, true); 1939 if (ret) { 1940 /* In the event of a disaster, abandon all caches and 1941 * hope for the best. 1942 */ 1943 WARN_ON(ret != -EIO); 1944 i915_gem_clflush_object(obj, true); 1945 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 1946 } 1947 1948 if (i915_gem_object_needs_bit17_swizzle(obj)) 1949 i915_gem_object_save_bit_17_swizzle(obj); 1950 1951 if (obj->madv == I915_MADV_DONTNEED) 1952 obj->dirty = 0; 1953 1954 #ifdef __linux__ 1955 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) { 1956 struct page *page = sg_page_iter_page(&sg_iter); 1957 1958 if (obj->dirty) 1959 set_page_dirty(page); 1960 1961 if (obj->madv == I915_MADV_WILLNEED) 1962 mark_page_accessed(page); 1963 1964 page_cache_release(page); 1965 } 1966 #else 1967 for (i = 0; i < page_count; i++) { 1968 struct vm_page *page = obj->pages[i]; 1969 1970 if (obj->dirty) 1971 atomic_clearbits_int(&page->pg_flags, PG_CLEAN); 1972 } 1973 uvm_objunwire(obj->base.uao, 0, obj->base.size); 1974 #endif 1975 obj->dirty = 0; 1976 1977 #ifdef __linux__ 1978 sg_free_table(obj->pages); 1979 #endif 1980 kfree(obj->pages); 1981 } 1982 1983 int 1984 i915_gem_object_put_pages(struct drm_i915_gem_object *obj) 1985 { 1986 const struct drm_i915_gem_object_ops *ops = obj->ops; 1987 1988 if (obj->pages == NULL) 1989 return 0; 1990 1991 if (obj->pages_pin_count) 1992 return -EBUSY; 1993 1994 BUG_ON(i915_gem_obj_bound_any(obj)); 1995 1996 /* ->put_pages might need to allocate memory for the bit17 swizzle 1997 * array, hence protect them from being reaped by removing them from gtt 1998 * lists early. */ 1999 list_del(&obj->global_list); 2000 2001 ops->put_pages(obj); 2002 obj->pages = NULL; 2003 2004 if (i915_gem_object_is_purgeable(obj)) 2005 i915_gem_object_truncate(obj); 2006 2007 return 0; 2008 } 2009 2010 #ifdef notyet 2011 static unsigned long 2012 __i915_gem_shrink(struct drm_i915_private *dev_priv, long target, 2013 bool purgeable_only) 2014 { 2015 struct list_head still_bound_list; 2016 struct drm_i915_gem_object *obj, *next; 2017 unsigned long count = 0; 2018 2019 list_for_each_entry_safe(obj, next, 2020 &dev_priv->mm.unbound_list, 2021 global_list) { 2022 if ((i915_gem_object_is_purgeable(obj) || !purgeable_only) && 2023 i915_gem_object_put_pages(obj) == 0) { 2024 count += obj->base.size >> PAGE_SHIFT; 2025 if (count >= target) 2026 return count; 2027 } 2028 } 2029 2030 /* 2031 * As we may completely rewrite the bound list whilst unbinding 2032 * (due to retiring requests) we have to strictly process only 2033 * one element of the list at the time, and recheck the list 2034 * on every iteration. 2035 */ 2036 INIT_LIST_HEAD(&still_bound_list); 2037 while (count < target && !list_empty(&dev_priv->mm.bound_list)) { 2038 struct i915_vma *vma, *v; 2039 2040 obj = list_first_entry(&dev_priv->mm.bound_list, 2041 typeof(*obj), global_list); 2042 list_move_tail(&obj->global_list, &still_bound_list); 2043 2044 if (!i915_gem_object_is_purgeable(obj) && purgeable_only) 2045 continue; 2046 2047 /* 2048 * Hold a reference whilst we unbind this object, as we may 2049 * end up waiting for and retiring requests. This might 2050 * release the final reference (held by the active list) 2051 * and result in the object being freed from under us. 2052 * in this object being freed. 2053 * 2054 * Note 1: Shrinking the bound list is special since only active 2055 * (and hence bound objects) can contain such limbo objects, so 2056 * we don't need special tricks for shrinking the unbound list. 2057 * The only other place where we have to be careful with active 2058 * objects suddenly disappearing due to retiring requests is the 2059 * eviction code. 2060 * 2061 * Note 2: Even though the bound list doesn't hold a reference 2062 * to the object we can safely grab one here: The final object 2063 * unreferencing and the bound_list are both protected by the 2064 * dev->struct_mutex and so we won't ever be able to observe an 2065 * object on the bound_list with a reference count equals 0. 2066 */ 2067 drm_gem_object_reference(&obj->base); 2068 2069 list_for_each_entry_safe(vma, v, &obj->vma_list, vma_link) 2070 if (i915_vma_unbind(vma)) 2071 break; 2072 2073 if (i915_gem_object_put_pages(obj) == 0) 2074 count += obj->base.size >> PAGE_SHIFT; 2075 2076 drm_gem_object_unreference(&obj->base); 2077 } 2078 list_splice(&still_bound_list, &dev_priv->mm.bound_list); 2079 2080 return count; 2081 } 2082 2083 static unsigned long 2084 i915_gem_purge(struct drm_i915_private *dev_priv, long target) 2085 { 2086 return __i915_gem_shrink(dev_priv, target, true); 2087 } 2088 2089 static unsigned long 2090 i915_gem_shrink_all(struct drm_i915_private *dev_priv) 2091 { 2092 struct drm_i915_gem_object *obj, *next; 2093 long freed = 0; 2094 2095 i915_gem_evict_everything(dev_priv->dev); 2096 2097 list_for_each_entry_safe(obj, next, &dev_priv->mm.unbound_list, 2098 global_list) { 2099 if (i915_gem_object_put_pages(obj) == 0) 2100 freed += obj->base.size >> PAGE_SHIFT; 2101 } 2102 return freed; 2103 } 2104 #endif /* notyet */ 2105 2106 static int 2107 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 2108 { 2109 #ifdef __linux__ 2110 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2111 int page_count, i; 2112 struct address_space *mapping; 2113 struct sg_table *st; 2114 struct scatterlist *sg; 2115 struct sg_page_iter sg_iter; 2116 struct page *page; 2117 unsigned long last_pfn = 0; /* suppress gcc warning */ 2118 gfp_t gfp; 2119 #else 2120 int page_count, i; 2121 struct vm_page **st; 2122 struct pglist plist; 2123 struct vm_page *page; 2124 #endif 2125 2126 /* Assert that the object is not currently in any GPU domain. As it 2127 * wasn't in the GTT, there shouldn't be any way it could have been in 2128 * a GPU cache 2129 */ 2130 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); 2131 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); 2132 2133 #ifdef __linux__ 2134 st = kmalloc(sizeof(*st), GFP_KERNEL); 2135 if (st == NULL) 2136 return -ENOMEM; 2137 2138 page_count = obj->base.size / PAGE_SIZE; 2139 if (sg_alloc_table(st, page_count, GFP_KERNEL)) { 2140 kfree(st); 2141 return -ENOMEM; 2142 } 2143 2144 /* Get the list of pages out of our struct file. They'll be pinned 2145 * at this point until we release them. 2146 * 2147 * Fail silently without starting the shrinker 2148 */ 2149 mapping = file_inode(obj->base.filp)->i_mapping; 2150 gfp = mapping_gfp_mask(mapping); 2151 gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD; 2152 gfp &= ~(__GFP_IO | __GFP_WAIT); 2153 sg = st->sgl; 2154 st->nents = 0; 2155 for (i = 0; i < page_count; i++) { 2156 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 2157 if (IS_ERR(page)) { 2158 i915_gem_purge(dev_priv, page_count); 2159 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 2160 } 2161 if (IS_ERR(page)) { 2162 /* We've tried hard to allocate the memory by reaping 2163 * our own buffer, now let the real VM do its job and 2164 * go down in flames if truly OOM. 2165 */ 2166 gfp &= ~(__GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD); 2167 gfp |= __GFP_IO | __GFP_WAIT; 2168 2169 i915_gem_shrink_all(dev_priv); 2170 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 2171 if (IS_ERR(page)) 2172 goto err_pages; 2173 2174 gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD; 2175 gfp &= ~(__GFP_IO | __GFP_WAIT); 2176 } 2177 #ifdef CONFIG_SWIOTLB 2178 if (swiotlb_nr_tbl()) { 2179 st->nents++; 2180 sg_set_page(sg, page, PAGE_SIZE, 0); 2181 sg = sg_next(sg); 2182 continue; 2183 } 2184 #endif 2185 if (!i || page_to_pfn(page) != last_pfn + 1) { 2186 if (i) 2187 sg = sg_next(sg); 2188 st->nents++; 2189 sg_set_page(sg, page, PAGE_SIZE, 0); 2190 } else { 2191 sg->length += PAGE_SIZE; 2192 } 2193 last_pfn = page_to_pfn(page); 2194 2195 /* Check that the i965g/gm workaround works. */ 2196 WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL)); 2197 } 2198 #ifdef CONFIG_SWIOTLB 2199 if (!swiotlb_nr_tbl()) 2200 #endif 2201 sg_mark_end(sg); 2202 obj->pages = st; 2203 #else 2204 page_count = obj->base.size / PAGE_SIZE; 2205 st = malloc(page_count * sizeof(struct vm_page *), M_DRM, 2206 M_WAITOK | M_CANFAIL); 2207 if (st == NULL) 2208 return -ENOMEM; 2209 2210 TAILQ_INIT(&plist); 2211 if (uvm_objwire(obj->base.uao, 0, obj->base.size, &plist)) 2212 goto err_pages; 2213 2214 i = 0; 2215 TAILQ_FOREACH(page, &plist, pageq) { 2216 st[i] = page; 2217 i++; 2218 } 2219 obj->pages = st; 2220 #endif 2221 2222 if (i915_gem_object_needs_bit17_swizzle(obj)) 2223 i915_gem_object_do_bit_17_swizzle(obj); 2224 2225 return 0; 2226 2227 #ifdef __linux__ 2228 err_pages: 2229 sg_mark_end(sg); 2230 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) 2231 page_cache_release(sg_page_iter_page(&sg_iter)); 2232 sg_free_table(st); 2233 kfree(st); 2234 return PTR_ERR(page); 2235 #else 2236 err_pages: 2237 free(st, M_DRM, 0); 2238 return -ENOMEM; 2239 #endif 2240 } 2241 2242 /* Ensure that the associated pages are gathered from the backing storage 2243 * and pinned into our object. i915_gem_object_get_pages() may be called 2244 * multiple times before they are released by a single call to 2245 * i915_gem_object_put_pages() - once the pages are no longer referenced 2246 * either as a result of memory pressure (reaping pages under the shrinker) 2247 * or as the object is itself released. 2248 */ 2249 int 2250 i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2251 { 2252 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2253 const struct drm_i915_gem_object_ops *ops = obj->ops; 2254 int ret; 2255 2256 if (obj->pages) 2257 return 0; 2258 2259 if (obj->madv != I915_MADV_WILLNEED) { 2260 DRM_ERROR("Attempting to obtain a purgeable object\n"); 2261 return -EINVAL; 2262 } 2263 2264 BUG_ON(obj->pages_pin_count); 2265 2266 ret = ops->get_pages(obj); 2267 if (ret) 2268 return ret; 2269 2270 list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list); 2271 return 0; 2272 } 2273 2274 static void 2275 i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, 2276 struct intel_ring_buffer *ring) 2277 { 2278 struct drm_device *dev = obj->base.dev; 2279 struct drm_i915_private *dev_priv = dev->dev_private; 2280 u32 seqno = intel_ring_get_seqno(ring); 2281 2282 BUG_ON(ring == NULL); 2283 if (obj->ring != ring && obj->last_write_seqno) { 2284 /* Keep the seqno relative to the current ring */ 2285 obj->last_write_seqno = seqno; 2286 } 2287 obj->ring = ring; 2288 2289 /* Add a reference if we're newly entering the active list. */ 2290 if (!obj->active) { 2291 drm_gem_object_reference(&obj->base); 2292 obj->active = 1; 2293 } 2294 2295 list_move_tail(&obj->ring_list, &ring->active_list); 2296 2297 obj->last_read_seqno = seqno; 2298 2299 if (obj->fenced_gpu_access) { 2300 obj->last_fenced_seqno = seqno; 2301 2302 /* Bump MRU to take account of the delayed flush */ 2303 if (obj->fence_reg != I915_FENCE_REG_NONE) { 2304 struct drm_i915_fence_reg *reg; 2305 2306 reg = &dev_priv->fence_regs[obj->fence_reg]; 2307 list_move_tail(®->lru_list, 2308 &dev_priv->mm.fence_list); 2309 } 2310 } 2311 } 2312 2313 void i915_vma_move_to_active(struct i915_vma *vma, 2314 struct intel_ring_buffer *ring) 2315 { 2316 list_move_tail(&vma->mm_list, &vma->vm->active_list); 2317 return i915_gem_object_move_to_active(vma->obj, ring); 2318 } 2319 2320 static void 2321 i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj) 2322 { 2323 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2324 struct i915_address_space *ggtt_vm = &dev_priv->gtt.base; 2325 struct i915_vma *vma = i915_gem_obj_to_vma(obj, ggtt_vm); 2326 2327 BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS); 2328 BUG_ON(!obj->active); 2329 2330 list_move_tail(&vma->mm_list, &ggtt_vm->inactive_list); 2331 2332 list_del_init(&obj->ring_list); 2333 obj->ring = NULL; 2334 2335 obj->last_read_seqno = 0; 2336 obj->last_write_seqno = 0; 2337 obj->base.write_domain = 0; 2338 2339 obj->last_fenced_seqno = 0; 2340 obj->fenced_gpu_access = false; 2341 2342 obj->active = 0; 2343 drm_gem_object_unreference(&obj->base); 2344 2345 WARN_ON(i915_verify_lists(dev)); 2346 } 2347 2348 static int 2349 i915_gem_init_seqno(struct drm_device *dev, u32 seqno) 2350 { 2351 struct drm_i915_private *dev_priv = dev->dev_private; 2352 struct intel_ring_buffer *ring; 2353 int ret, i, j; 2354 2355 /* Carefully retire all requests without writing to the rings */ 2356 for_each_ring(ring, dev_priv, i) { 2357 ret = intel_ring_idle(ring); 2358 if (ret) 2359 return ret; 2360 } 2361 i915_gem_retire_requests(dev); 2362 2363 /* Finally reset hw state */ 2364 for_each_ring(ring, dev_priv, i) { 2365 intel_ring_init_seqno(ring, seqno); 2366 2367 for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++) 2368 ring->sync_seqno[j] = 0; 2369 } 2370 2371 return 0; 2372 } 2373 2374 int i915_gem_set_seqno(struct drm_device *dev, u32 seqno) 2375 { 2376 struct drm_i915_private *dev_priv = dev->dev_private; 2377 int ret; 2378 2379 if (seqno == 0) 2380 return -EINVAL; 2381 2382 /* HWS page needs to be set less than what we 2383 * will inject to ring 2384 */ 2385 ret = i915_gem_init_seqno(dev, seqno - 1); 2386 if (ret) 2387 return ret; 2388 2389 /* Carefully set the last_seqno value so that wrap 2390 * detection still works 2391 */ 2392 dev_priv->next_seqno = seqno; 2393 dev_priv->last_seqno = seqno - 1; 2394 if (dev_priv->last_seqno == 0) 2395 dev_priv->last_seqno--; 2396 2397 return 0; 2398 } 2399 2400 int 2401 i915_gem_get_seqno(struct drm_device *dev, u32 *seqno) 2402 { 2403 struct drm_i915_private *dev_priv = dev->dev_private; 2404 2405 /* reserve 0 for non-seqno */ 2406 if (dev_priv->next_seqno == 0) { 2407 int ret = i915_gem_init_seqno(dev, 0); 2408 if (ret) 2409 return ret; 2410 2411 dev_priv->next_seqno = 1; 2412 } 2413 2414 *seqno = dev_priv->last_seqno = dev_priv->next_seqno++; 2415 return 0; 2416 } 2417 2418 int __i915_add_request(struct intel_ring_buffer *ring, 2419 struct drm_file *file, 2420 struct drm_i915_gem_object *obj, 2421 u32 *out_seqno) 2422 { 2423 drm_i915_private_t *dev_priv = ring->dev->dev_private; 2424 struct drm_i915_gem_request *request; 2425 u32 request_ring_position, request_start; 2426 int was_empty; 2427 int ret; 2428 2429 request_start = intel_ring_get_tail(ring); 2430 /* 2431 * Emit any outstanding flushes - execbuf can fail to emit the flush 2432 * after having emitted the batchbuffer command. Hence we need to fix 2433 * things up similar to emitting the lazy request. The difference here 2434 * is that the flush _must_ happen before the next request, no matter 2435 * what. 2436 */ 2437 ret = intel_ring_flush_all_caches(ring); 2438 if (ret) 2439 return ret; 2440 2441 request = ring->preallocated_lazy_request; 2442 if (WARN_ON(request == NULL)) 2443 return -ENOMEM; 2444 2445 /* Record the position of the start of the request so that 2446 * should we detect the updated seqno part-way through the 2447 * GPU processing the request, we never over-estimate the 2448 * position of the head. 2449 */ 2450 request_ring_position = intel_ring_get_tail(ring); 2451 2452 ret = ring->add_request(ring); 2453 if (ret) 2454 return ret; 2455 2456 request->seqno = intel_ring_get_seqno(ring); 2457 request->ring = ring; 2458 request->head = request_start; 2459 request->tail = request_ring_position; 2460 2461 /* Whilst this request exists, batch_obj will be on the 2462 * active_list, and so will hold the active reference. Only when this 2463 * request is retired will the the batch_obj be moved onto the 2464 * inactive_list and lose its active reference. Hence we do not need 2465 * to explicitly hold another reference here. 2466 */ 2467 request->batch_obj = obj; 2468 2469 /* Hold a reference to the current context so that we can inspect 2470 * it later in case a hangcheck error event fires. 2471 */ 2472 request->ctx = ring->last_context; 2473 if (request->ctx) 2474 i915_gem_context_reference(request->ctx); 2475 2476 request->emitted_jiffies = jiffies; 2477 was_empty = list_empty(&ring->request_list); 2478 list_add_tail(&request->list, &ring->request_list); 2479 request->file_priv = NULL; 2480 2481 if (file) { 2482 struct drm_i915_file_private *file_priv = file->driver_priv; 2483 2484 spin_lock(&file_priv->mm.lock); 2485 request->file_priv = file_priv; 2486 list_add_tail(&request->client_list, 2487 &file_priv->mm.request_list); 2488 spin_unlock(&file_priv->mm.lock); 2489 } 2490 2491 trace_i915_gem_request_add(ring, request->seqno); 2492 ring->outstanding_lazy_seqno = 0; 2493 ring->preallocated_lazy_request = NULL; 2494 2495 if (!dev_priv->ums.mm_suspended) { 2496 i915_queue_hangcheck(ring->dev); 2497 2498 if (was_empty) { 2499 cancel_delayed_work_sync(&dev_priv->mm.idle_work); 2500 queue_delayed_work(dev_priv->wq, 2501 &dev_priv->mm.retire_work, 2502 round_jiffies_up_relative(HZ)); 2503 intel_mark_busy(dev_priv->dev); 2504 } 2505 } 2506 2507 if (out_seqno) 2508 *out_seqno = request->seqno; 2509 return 0; 2510 } 2511 2512 static inline void 2513 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) 2514 { 2515 struct drm_i915_file_private *file_priv = request->file_priv; 2516 2517 if (!file_priv) 2518 return; 2519 2520 spin_lock(&file_priv->mm.lock); 2521 list_del(&request->client_list); 2522 request->file_priv = NULL; 2523 spin_unlock(&file_priv->mm.lock); 2524 } 2525 2526 static bool i915_head_inside_object(u32 acthd, struct drm_i915_gem_object *obj, 2527 struct i915_address_space *vm) 2528 { 2529 if (acthd >= i915_gem_obj_offset(obj, vm) && 2530 acthd < i915_gem_obj_offset(obj, vm) + obj->base.size) 2531 return true; 2532 2533 return false; 2534 } 2535 2536 static bool i915_head_inside_request(const u32 acthd_unmasked, 2537 const u32 request_start, 2538 const u32 request_end) 2539 { 2540 const u32 acthd = acthd_unmasked & HEAD_ADDR; 2541 2542 if (request_start < request_end) { 2543 if (acthd >= request_start && acthd < request_end) 2544 return true; 2545 } else if (request_start > request_end) { 2546 if (acthd >= request_start || acthd < request_end) 2547 return true; 2548 } 2549 2550 return false; 2551 } 2552 2553 static struct i915_address_space * 2554 request_to_vm(struct drm_i915_gem_request *request) 2555 { 2556 struct drm_i915_private *dev_priv = request->ring->dev->dev_private; 2557 struct i915_address_space *vm; 2558 2559 vm = &dev_priv->gtt.base; 2560 2561 return vm; 2562 } 2563 2564 static bool i915_request_guilty(struct drm_i915_gem_request *request, 2565 const u32 acthd, bool *inside) 2566 { 2567 /* There is a possibility that unmasked head address 2568 * pointing inside the ring, matches the batch_obj address range. 2569 * However this is extremely unlikely. 2570 */ 2571 if (request->batch_obj) { 2572 if (i915_head_inside_object(acthd, request->batch_obj, 2573 request_to_vm(request))) { 2574 *inside = true; 2575 return true; 2576 } 2577 } 2578 2579 if (i915_head_inside_request(acthd, request->head, request->tail)) { 2580 *inside = false; 2581 return true; 2582 } 2583 2584 return false; 2585 } 2586 2587 static bool i915_context_is_banned(const struct i915_ctx_hang_stats *hs) 2588 { 2589 const unsigned long elapsed = get_seconds() - hs->guilty_ts; 2590 2591 if (hs->banned) 2592 return true; 2593 2594 if (elapsed <= DRM_I915_CTX_BAN_PERIOD) { 2595 DRM_ERROR("context hanging too fast, declaring banned!\n"); 2596 return true; 2597 } 2598 2599 return false; 2600 } 2601 2602 static void i915_set_reset_status(struct intel_ring_buffer *ring, 2603 struct drm_i915_gem_request *request, 2604 u32 acthd) 2605 { 2606 struct i915_ctx_hang_stats *hs = NULL; 2607 bool inside, guilty; 2608 unsigned long offset = 0; 2609 2610 /* Innocent until proven guilty */ 2611 guilty = false; 2612 2613 if (request->batch_obj) 2614 offset = i915_gem_obj_offset(request->batch_obj, 2615 request_to_vm(request)); 2616 2617 if (ring->hangcheck.action != HANGCHECK_WAIT && 2618 i915_request_guilty(request, acthd, &inside)) { 2619 DRM_DEBUG("%s hung %s bo (0x%lx ctx %d) at 0x%x\n", 2620 ring->name, 2621 inside ? "inside" : "flushing", 2622 offset, 2623 request->ctx ? request->ctx->id : 0, 2624 acthd); 2625 2626 guilty = true; 2627 } 2628 2629 /* If contexts are disabled or this is the default context, use 2630 * file_priv->reset_state 2631 */ 2632 if (request->ctx && request->ctx->id != DEFAULT_CONTEXT_ID) 2633 hs = &request->ctx->hang_stats; 2634 else if (request->file_priv) 2635 hs = &request->file_priv->hang_stats; 2636 2637 if (hs) { 2638 if (guilty) { 2639 hs->banned = i915_context_is_banned(hs); 2640 hs->batch_active++; 2641 hs->guilty_ts = get_seconds(); 2642 } else { 2643 hs->batch_pending++; 2644 } 2645 } 2646 } 2647 2648 static void i915_gem_free_request(struct drm_i915_gem_request *request) 2649 { 2650 list_del(&request->list); 2651 i915_gem_request_remove_from_client(request); 2652 2653 if (request->ctx) 2654 i915_gem_context_unreference(request->ctx); 2655 2656 kfree(request); 2657 } 2658 2659 static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv, 2660 struct intel_ring_buffer *ring) 2661 { 2662 u32 completed_seqno = ring->get_seqno(ring, false); 2663 u32 acthd = intel_ring_get_active_head(ring); 2664 struct drm_i915_gem_request *request; 2665 2666 list_for_each_entry(request, &ring->request_list, list) { 2667 if (i915_seqno_passed(completed_seqno, request->seqno)) 2668 continue; 2669 2670 i915_set_reset_status(ring, request, acthd); 2671 } 2672 } 2673 2674 static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv, 2675 struct intel_ring_buffer *ring) 2676 { 2677 while (!list_empty(&ring->active_list)) { 2678 struct drm_i915_gem_object *obj; 2679 2680 obj = list_first_entry(&ring->active_list, 2681 struct drm_i915_gem_object, 2682 ring_list); 2683 2684 i915_gem_object_move_to_inactive(obj); 2685 } 2686 2687 /* 2688 * We must free the requests after all the corresponding objects have 2689 * been moved off active lists. Which is the same order as the normal 2690 * retire_requests function does. This is important if object hold 2691 * implicit references on things like e.g. ppgtt address spaces through 2692 * the request. 2693 */ 2694 while (!list_empty(&ring->request_list)) { 2695 struct drm_i915_gem_request *request; 2696 2697 request = list_first_entry(&ring->request_list, 2698 struct drm_i915_gem_request, 2699 list); 2700 2701 i915_gem_free_request(request); 2702 } 2703 } 2704 2705 void i915_gem_restore_fences(struct drm_device *dev) 2706 { 2707 struct drm_i915_private *dev_priv = dev->dev_private; 2708 int i; 2709 2710 for (i = 0; i < dev_priv->num_fence_regs; i++) { 2711 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; 2712 2713 /* 2714 * Commit delayed tiling changes if we have an object still 2715 * attached to the fence, otherwise just clear the fence. 2716 */ 2717 if (reg->obj) { 2718 i915_gem_object_update_fence(reg->obj, reg, 2719 reg->obj->tiling_mode); 2720 } else { 2721 i915_gem_write_fence(dev, i, NULL); 2722 } 2723 } 2724 } 2725 2726 void i915_gem_reset(struct drm_device *dev) 2727 { 2728 struct drm_i915_private *dev_priv = dev->dev_private; 2729 struct intel_ring_buffer *ring; 2730 int i; 2731 2732 /* 2733 * Before we free the objects from the requests, we need to inspect 2734 * them for finding the guilty party. As the requests only borrow 2735 * their reference to the objects, the inspection must be done first. 2736 */ 2737 for_each_ring(ring, dev_priv, i) 2738 i915_gem_reset_ring_status(dev_priv, ring); 2739 2740 for_each_ring(ring, dev_priv, i) 2741 i915_gem_reset_ring_cleanup(dev_priv, ring); 2742 2743 i915_gem_cleanup_ringbuffer(dev); 2744 2745 i915_gem_restore_fences(dev); 2746 } 2747 2748 /** 2749 * This function clears the request list as sequence numbers are passed. 2750 */ 2751 void 2752 i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) 2753 { 2754 uint32_t seqno; 2755 2756 if (list_empty(&ring->request_list)) 2757 return; 2758 2759 WARN_ON(i915_verify_lists(ring->dev)); 2760 2761 seqno = ring->get_seqno(ring, true); 2762 2763 while (!list_empty(&ring->request_list)) { 2764 struct drm_i915_gem_request *request; 2765 2766 request = list_first_entry(&ring->request_list, 2767 struct drm_i915_gem_request, 2768 list); 2769 2770 if (!i915_seqno_passed(seqno, request->seqno)) 2771 break; 2772 2773 trace_i915_gem_request_retire(ring, request->seqno); 2774 /* We know the GPU must have read the request to have 2775 * sent us the seqno + interrupt, so use the position 2776 * of tail of the request to update the last known position 2777 * of the GPU head. 2778 */ 2779 ring->last_retired_head = request->tail; 2780 2781 i915_gem_free_request(request); 2782 } 2783 2784 /* Move any buffers on the active list that are no longer referenced 2785 * by the ringbuffer to the flushing/inactive lists as appropriate. 2786 */ 2787 while (!list_empty(&ring->active_list)) { 2788 struct drm_i915_gem_object *obj; 2789 2790 obj = list_first_entry(&ring->active_list, 2791 struct drm_i915_gem_object, 2792 ring_list); 2793 2794 if (!i915_seqno_passed(seqno, obj->last_read_seqno)) 2795 break; 2796 2797 i915_gem_object_move_to_inactive(obj); 2798 } 2799 2800 if (unlikely(ring->trace_irq_seqno && 2801 i915_seqno_passed(seqno, ring->trace_irq_seqno))) { 2802 ring->irq_put(ring); 2803 ring->trace_irq_seqno = 0; 2804 } 2805 2806 WARN_ON(i915_verify_lists(ring->dev)); 2807 } 2808 2809 bool 2810 i915_gem_retire_requests(struct drm_device *dev) 2811 { 2812 drm_i915_private_t *dev_priv = dev->dev_private; 2813 struct intel_ring_buffer *ring; 2814 bool idle = true; 2815 int i; 2816 2817 for_each_ring(ring, dev_priv, i) { 2818 i915_gem_retire_requests_ring(ring); 2819 idle &= list_empty(&ring->request_list); 2820 } 2821 2822 if (idle) 2823 mod_delayed_work(dev_priv->wq, 2824 &dev_priv->mm.idle_work, 2825 msecs_to_jiffies(100)); 2826 2827 return idle; 2828 } 2829 2830 static void 2831 i915_gem_retire_work_handler(struct work_struct *work) 2832 { 2833 struct drm_i915_private *dev_priv = 2834 container_of(work, typeof(*dev_priv), mm.retire_work.work); 2835 struct drm_device *dev = dev_priv->dev; 2836 bool idle; 2837 2838 /* Come back later if the device is busy... */ 2839 idle = false; 2840 if (mutex_trylock(&dev->struct_mutex)) { 2841 idle = i915_gem_retire_requests(dev); 2842 mutex_unlock(&dev->struct_mutex); 2843 } 2844 if (!idle) 2845 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 2846 round_jiffies_up_relative(HZ)); 2847 } 2848 2849 static void 2850 i915_gem_idle_work_handler(struct work_struct *work) 2851 { 2852 struct drm_i915_private *dev_priv = 2853 container_of(work, typeof(*dev_priv), mm.idle_work.work); 2854 2855 intel_mark_idle(dev_priv->dev); 2856 } 2857 2858 /** 2859 * Ensures that an object will eventually get non-busy by flushing any required 2860 * write domains, emitting any outstanding lazy request and retiring and 2861 * completed requests. 2862 */ 2863 static int 2864 i915_gem_object_flush_active(struct drm_i915_gem_object *obj) 2865 { 2866 int ret; 2867 2868 if (obj->active) { 2869 ret = i915_gem_check_olr(obj->ring, obj->last_read_seqno); 2870 if (ret) 2871 return ret; 2872 2873 i915_gem_retire_requests_ring(obj->ring); 2874 } 2875 2876 return 0; 2877 } 2878 2879 /** 2880 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT 2881 * @DRM_IOCTL_ARGS: standard ioctl arguments 2882 * 2883 * Returns 0 if successful, else an error is returned with the remaining time in 2884 * the timeout parameter. 2885 * -ETIME: object is still busy after timeout 2886 * -ERESTARTSYS: signal interrupted the wait 2887 * -ENONENT: object doesn't exist 2888 * Also possible, but rare: 2889 * -EAGAIN: GPU wedged 2890 * -ENOMEM: damn 2891 * -ENODEV: Internal IRQ fail 2892 * -E?: The add request failed 2893 * 2894 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any 2895 * non-zero timeout parameter the wait ioctl will wait for the given number of 2896 * nanoseconds on an object becoming unbusy. Since the wait itself does so 2897 * without holding struct_mutex the object may become re-busied before this 2898 * function completes. A similar but shorter * race condition exists in the busy 2899 * ioctl 2900 */ 2901 int 2902 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 2903 { 2904 drm_i915_private_t *dev_priv = dev->dev_private; 2905 struct drm_i915_gem_wait *args = data; 2906 struct drm_i915_gem_object *obj; 2907 struct intel_ring_buffer *ring = NULL; 2908 struct timespec timeout_stack, *timeout = NULL; 2909 unsigned reset_counter; 2910 u32 seqno = 0; 2911 int ret = 0; 2912 2913 if (args->timeout_ns >= 0) { 2914 timeout_stack = ns_to_timespec(args->timeout_ns); 2915 timeout = &timeout_stack; 2916 } 2917 2918 ret = i915_mutex_lock_interruptible(dev); 2919 if (ret) 2920 return ret; 2921 2922 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->bo_handle)); 2923 if (&obj->base == NULL) { 2924 mutex_unlock(&dev->struct_mutex); 2925 return -ENOENT; 2926 } 2927 2928 /* Need to make sure the object gets inactive eventually. */ 2929 ret = i915_gem_object_flush_active(obj); 2930 if (ret) 2931 goto out; 2932 2933 if (obj->active) { 2934 seqno = obj->last_read_seqno; 2935 ring = obj->ring; 2936 } 2937 2938 if (seqno == 0) 2939 goto out; 2940 2941 /* Do this after OLR check to make sure we make forward progress polling 2942 * on this IOCTL with a 0 timeout (like busy ioctl) 2943 */ 2944 if (!args->timeout_ns) { 2945 ret = -ETIME; 2946 goto out; 2947 } 2948 2949 drm_gem_object_unreference(&obj->base); 2950 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 2951 mutex_unlock(&dev->struct_mutex); 2952 2953 ret = __wait_seqno(ring, seqno, reset_counter, true, timeout, file->driver_priv); 2954 if (timeout) 2955 args->timeout_ns = timespec_to_ns(timeout); 2956 return ret; 2957 2958 out: 2959 drm_gem_object_unreference(&obj->base); 2960 mutex_unlock(&dev->struct_mutex); 2961 return ret; 2962 } 2963 2964 /** 2965 * i915_gem_object_sync - sync an object to a ring. 2966 * 2967 * @obj: object which may be in use on another ring. 2968 * @to: ring we wish to use the object on. May be NULL. 2969 * 2970 * This code is meant to abstract object synchronization with the GPU. 2971 * Calling with NULL implies synchronizing the object with the CPU 2972 * rather than a particular GPU ring. 2973 * 2974 * Returns 0 if successful, else propagates up the lower layer error. 2975 */ 2976 int 2977 i915_gem_object_sync(struct drm_i915_gem_object *obj, 2978 struct intel_ring_buffer *to) 2979 { 2980 struct intel_ring_buffer *from = obj->ring; 2981 u32 seqno; 2982 int ret, idx; 2983 2984 if (from == NULL || to == from) 2985 return 0; 2986 2987 if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev)) 2988 return i915_gem_object_wait_rendering(obj, false); 2989 2990 idx = intel_ring_sync_index(from, to); 2991 2992 seqno = obj->last_read_seqno; 2993 if (seqno <= from->sync_seqno[idx]) 2994 return 0; 2995 2996 ret = i915_gem_check_olr(obj->ring, seqno); 2997 if (ret) 2998 return ret; 2999 3000 trace_i915_gem_ring_sync_to(from, to, seqno); 3001 ret = to->sync_to(to, from, seqno); 3002 if (!ret) 3003 /* We use last_read_seqno because sync_to() 3004 * might have just caused seqno wrap under 3005 * the radar. 3006 */ 3007 from->sync_seqno[idx] = obj->last_read_seqno; 3008 3009 return ret; 3010 } 3011 3012 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) 3013 { 3014 u32 old_write_domain, old_read_domains; 3015 3016 /* Force a pagefault for domain tracking on next user access */ 3017 i915_gem_release_mmap(obj); 3018 3019 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3020 return; 3021 3022 /* Wait for any direct GTT access to complete */ 3023 mb(); 3024 3025 old_read_domains = obj->base.read_domains; 3026 old_write_domain = obj->base.write_domain; 3027 3028 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT; 3029 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT; 3030 3031 trace_i915_gem_object_change_domain(obj, 3032 old_read_domains, 3033 old_write_domain); 3034 } 3035 3036 int i915_vma_unbind(struct i915_vma *vma) 3037 { 3038 struct drm_i915_gem_object *obj = vma->obj; 3039 drm_i915_private_t *dev_priv = obj->base.dev->dev_private; 3040 int ret; 3041 3042 /* For now we only ever use 1 vma per object */ 3043 WARN_ON(!list_is_singular(&obj->vma_list)); 3044 3045 if (list_empty(&vma->vma_link)) 3046 return 0; 3047 3048 if (!drm_mm_node_allocated(&vma->node)) { 3049 i915_gem_vma_destroy(vma); 3050 3051 return 0; 3052 } 3053 3054 if (obj->pin_count) 3055 return -EBUSY; 3056 3057 BUG_ON(obj->pages == NULL); 3058 3059 ret = i915_gem_object_finish_gpu(obj); 3060 if (ret) 3061 return ret; 3062 /* Continue on if we fail due to EIO, the GPU is hung so we 3063 * should be safe and we need to cleanup or else we might 3064 * cause memory corruption through use-after-free. 3065 */ 3066 3067 i915_gem_object_finish_gtt(obj); 3068 3069 /* release the fence reg _after_ flushing */ 3070 ret = i915_gem_object_put_fence(obj); 3071 if (ret) 3072 return ret; 3073 3074 trace_i915_vma_unbind(vma); 3075 3076 if (obj->has_global_gtt_mapping) 3077 i915_gem_gtt_unbind_object(obj); 3078 if (obj->has_aliasing_ppgtt_mapping) { 3079 i915_ppgtt_unbind_object(dev_priv->mm.aliasing_ppgtt, obj); 3080 obj->has_aliasing_ppgtt_mapping = 0; 3081 } 3082 i915_gem_gtt_finish_object(obj); 3083 3084 list_del(&vma->mm_list); 3085 /* Avoid an unnecessary call to unbind on rebind. */ 3086 if (i915_is_ggtt(vma->vm)) 3087 obj->map_and_fenceable = true; 3088 3089 drm_mm_remove_node(&vma->node); 3090 i915_gem_vma_destroy(vma); 3091 3092 /* Since the unbound list is global, only move to that list if 3093 * no more VMAs exist. */ 3094 if (list_empty(&obj->vma_list)) 3095 list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list); 3096 3097 /* And finally now the object is completely decoupled from this vma, 3098 * we can drop its hold on the backing storage and allow it to be 3099 * reaped by the shrinker. 3100 */ 3101 i915_gem_object_unpin_pages(obj); 3102 /* XXX Until we've hooked up the shrinking functions. */ 3103 i915_gem_object_put_pages(obj); 3104 3105 return 0; 3106 } 3107 3108 /** 3109 * Unbinds an object from the global GTT aperture. 3110 */ 3111 int 3112 i915_gem_object_ggtt_unbind(struct drm_i915_gem_object *obj) 3113 { 3114 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3115 struct i915_address_space *ggtt = &dev_priv->gtt.base; 3116 3117 if (!i915_gem_obj_ggtt_bound(obj)) 3118 return 0; 3119 3120 if (obj->pin_count) 3121 return -EBUSY; 3122 3123 BUG_ON(obj->pages == NULL); 3124 3125 return i915_vma_unbind(i915_gem_obj_to_vma(obj, ggtt)); 3126 } 3127 3128 int i915_gpu_idle(struct drm_device *dev) 3129 { 3130 drm_i915_private_t *dev_priv = dev->dev_private; 3131 struct intel_ring_buffer *ring; 3132 int ret, i; 3133 3134 /* Flush everything onto the inactive list. */ 3135 for_each_ring(ring, dev_priv, i) { 3136 ret = i915_switch_context(ring, NULL, DEFAULT_CONTEXT_ID); 3137 if (ret) 3138 return ret; 3139 3140 ret = intel_ring_idle(ring); 3141 if (ret) 3142 return ret; 3143 } 3144 3145 return 0; 3146 } 3147 3148 static void i965_write_fence_reg(struct drm_device *dev, int reg, 3149 struct drm_i915_gem_object *obj) 3150 { 3151 drm_i915_private_t *dev_priv = dev->dev_private; 3152 int fence_reg; 3153 int fence_pitch_shift; 3154 3155 if (INTEL_INFO(dev)->gen >= 6) { 3156 fence_reg = FENCE_REG_SANDYBRIDGE_0; 3157 fence_pitch_shift = SANDYBRIDGE_FENCE_PITCH_SHIFT; 3158 } else { 3159 fence_reg = FENCE_REG_965_0; 3160 fence_pitch_shift = I965_FENCE_PITCH_SHIFT; 3161 } 3162 3163 fence_reg += reg * 8; 3164 3165 /* To w/a incoherency with non-atomic 64-bit register updates, 3166 * we split the 64-bit update into two 32-bit writes. In order 3167 * for a partial fence not to be evaluated between writes, we 3168 * precede the update with write to turn off the fence register, 3169 * and only enable the fence as the last step. 3170 * 3171 * For extra levels of paranoia, we make sure each step lands 3172 * before applying the next step. 3173 */ 3174 I915_WRITE(fence_reg, 0); 3175 POSTING_READ(fence_reg); 3176 3177 if (obj) { 3178 u32 size = i915_gem_obj_ggtt_size(obj); 3179 uint64_t val; 3180 3181 /* Adjust fence size to match tiled area */ 3182 if (obj->tiling_mode != I915_TILING_NONE) { 3183 uint32_t row_size = obj->stride * 3184 (obj->tiling_mode == I915_TILING_Y ? 32 : 8); 3185 size = (size / row_size) * row_size; 3186 } 3187 3188 val = (uint64_t)((i915_gem_obj_ggtt_offset(obj) + size - 4096) & 3189 0xfffff000) << 32; 3190 val |= i915_gem_obj_ggtt_offset(obj) & 0xfffff000; 3191 val |= (uint64_t)((obj->stride / 128) - 1) << fence_pitch_shift; 3192 if (obj->tiling_mode == I915_TILING_Y) 3193 val |= 1 << I965_FENCE_TILING_Y_SHIFT; 3194 val |= I965_FENCE_REG_VALID; 3195 3196 I915_WRITE(fence_reg + 4, val >> 32); 3197 POSTING_READ(fence_reg + 4); 3198 3199 I915_WRITE(fence_reg + 0, val); 3200 POSTING_READ(fence_reg); 3201 } else { 3202 I915_WRITE(fence_reg + 4, 0); 3203 POSTING_READ(fence_reg + 4); 3204 } 3205 } 3206 3207 static void i915_write_fence_reg(struct drm_device *dev, int reg, 3208 struct drm_i915_gem_object *obj) 3209 { 3210 drm_i915_private_t *dev_priv = dev->dev_private; 3211 u32 val; 3212 3213 if (obj) { 3214 u32 size = i915_gem_obj_ggtt_size(obj); 3215 int pitch_val; 3216 int tile_width; 3217 3218 WARN((i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK) || 3219 (size & -size) != size || 3220 (i915_gem_obj_ggtt_offset(obj) & (size - 1)), 3221 "object 0x%08lx [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n", 3222 i915_gem_obj_ggtt_offset(obj), obj->map_and_fenceable, size); 3223 3224 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)) 3225 tile_width = 128; 3226 else 3227 tile_width = 512; 3228 3229 /* Note: pitch better be a power of two tile widths */ 3230 pitch_val = obj->stride / tile_width; 3231 pitch_val = ffs(pitch_val) - 1; 3232 3233 val = i915_gem_obj_ggtt_offset(obj); 3234 if (obj->tiling_mode == I915_TILING_Y) 3235 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 3236 val |= I915_FENCE_SIZE_BITS(size); 3237 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 3238 val |= I830_FENCE_REG_VALID; 3239 } else 3240 val = 0; 3241 3242 if (reg < 8) 3243 reg = FENCE_REG_830_0 + reg * 4; 3244 else 3245 reg = FENCE_REG_945_8 + (reg - 8) * 4; 3246 3247 I915_WRITE(reg, val); 3248 POSTING_READ(reg); 3249 } 3250 3251 static void i830_write_fence_reg(struct drm_device *dev, int reg, 3252 struct drm_i915_gem_object *obj) 3253 { 3254 drm_i915_private_t *dev_priv = dev->dev_private; 3255 uint32_t val; 3256 3257 if (obj) { 3258 u32 size = i915_gem_obj_ggtt_size(obj); 3259 uint32_t pitch_val; 3260 3261 WARN((i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK) || 3262 (size & -size) != size || 3263 (i915_gem_obj_ggtt_offset(obj) & (size - 1)), 3264 "object 0x%08lx not 512K or pot-size 0x%08x aligned\n", 3265 i915_gem_obj_ggtt_offset(obj), size); 3266 3267 pitch_val = obj->stride / 128; 3268 pitch_val = ffs(pitch_val) - 1; 3269 3270 val = i915_gem_obj_ggtt_offset(obj); 3271 if (obj->tiling_mode == I915_TILING_Y) 3272 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 3273 val |= I830_FENCE_SIZE_BITS(size); 3274 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 3275 val |= I830_FENCE_REG_VALID; 3276 } else 3277 val = 0; 3278 3279 I915_WRITE(FENCE_REG_830_0 + reg * 4, val); 3280 POSTING_READ(FENCE_REG_830_0 + reg * 4); 3281 } 3282 3283 inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj) 3284 { 3285 return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT; 3286 } 3287 3288 static void i915_gem_write_fence(struct drm_device *dev, int reg, 3289 struct drm_i915_gem_object *obj) 3290 { 3291 struct drm_i915_private *dev_priv = dev->dev_private; 3292 3293 /* Ensure that all CPU reads are completed before installing a fence 3294 * and all writes before removing the fence. 3295 */ 3296 if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj)) 3297 mb(); 3298 3299 WARN(obj && (!obj->stride || !obj->tiling_mode), 3300 "bogus fence setup with stride: 0x%x, tiling mode: %i\n", 3301 obj->stride, obj->tiling_mode); 3302 3303 switch (INTEL_INFO(dev)->gen) { 3304 case 8: 3305 case 7: 3306 case 6: 3307 case 5: 3308 case 4: i965_write_fence_reg(dev, reg, obj); break; 3309 case 3: i915_write_fence_reg(dev, reg, obj); break; 3310 case 2: i830_write_fence_reg(dev, reg, obj); break; 3311 default: BUG(); 3312 } 3313 3314 /* And similarly be paranoid that no direct access to this region 3315 * is reordered to before the fence is installed. 3316 */ 3317 if (i915_gem_object_needs_mb(obj)) 3318 mb(); 3319 } 3320 3321 static inline int fence_number(struct drm_i915_private *dev_priv, 3322 struct drm_i915_fence_reg *fence) 3323 { 3324 return fence - dev_priv->fence_regs; 3325 } 3326 3327 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, 3328 struct drm_i915_fence_reg *fence, 3329 bool enable) 3330 { 3331 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3332 int reg = fence_number(dev_priv, fence); 3333 3334 i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL); 3335 3336 if (enable) { 3337 obj->fence_reg = reg; 3338 fence->obj = obj; 3339 list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list); 3340 } else { 3341 obj->fence_reg = I915_FENCE_REG_NONE; 3342 fence->obj = NULL; 3343 list_del_init(&fence->lru_list); 3344 } 3345 obj->fence_dirty = false; 3346 } 3347 3348 static int 3349 i915_gem_object_wait_fence(struct drm_i915_gem_object *obj) 3350 { 3351 if (obj->last_fenced_seqno) { 3352 int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno); 3353 if (ret) 3354 return ret; 3355 3356 obj->last_fenced_seqno = 0; 3357 } 3358 3359 obj->fenced_gpu_access = false; 3360 return 0; 3361 } 3362 3363 int 3364 i915_gem_object_put_fence(struct drm_i915_gem_object *obj) 3365 { 3366 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3367 struct drm_i915_fence_reg *fence; 3368 int ret; 3369 3370 ret = i915_gem_object_wait_fence(obj); 3371 if (ret) 3372 return ret; 3373 3374 if (obj->fence_reg == I915_FENCE_REG_NONE) 3375 return 0; 3376 3377 fence = &dev_priv->fence_regs[obj->fence_reg]; 3378 3379 i915_gem_object_fence_lost(obj); 3380 i915_gem_object_update_fence(obj, fence, false); 3381 3382 return 0; 3383 } 3384 3385 static struct drm_i915_fence_reg * 3386 i915_find_fence_reg(struct drm_device *dev) 3387 { 3388 struct drm_i915_private *dev_priv = dev->dev_private; 3389 struct drm_i915_fence_reg *reg, *avail; 3390 int i; 3391 3392 /* First try to find a free reg */ 3393 avail = NULL; 3394 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) { 3395 reg = &dev_priv->fence_regs[i]; 3396 if (!reg->obj) 3397 return reg; 3398 3399 if (!reg->pin_count) 3400 avail = reg; 3401 } 3402 3403 if (avail == NULL) 3404 goto deadlock; 3405 3406 /* None available, try to steal one or wait for a user to finish */ 3407 list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) { 3408 if (reg->pin_count) 3409 continue; 3410 3411 return reg; 3412 } 3413 3414 deadlock: 3415 /* Wait for completion of pending flips which consume fences */ 3416 if (intel_has_pending_fb_unpin(dev)) 3417 return ERR_PTR(-EAGAIN); 3418 3419 return ERR_PTR(-EDEADLK); 3420 } 3421 3422 /** 3423 * i915_gem_object_get_fence - set up fencing for an object 3424 * @obj: object to map through a fence reg 3425 * 3426 * When mapping objects through the GTT, userspace wants to be able to write 3427 * to them without having to worry about swizzling if the object is tiled. 3428 * This function walks the fence regs looking for a free one for @obj, 3429 * stealing one if it can't find any. 3430 * 3431 * It then sets up the reg based on the object's properties: address, pitch 3432 * and tiling format. 3433 * 3434 * For an untiled surface, this removes any existing fence. 3435 */ 3436 int 3437 i915_gem_object_get_fence(struct drm_i915_gem_object *obj) 3438 { 3439 struct drm_device *dev = obj->base.dev; 3440 struct drm_i915_private *dev_priv = dev->dev_private; 3441 bool enable = obj->tiling_mode != I915_TILING_NONE; 3442 struct drm_i915_fence_reg *reg; 3443 int ret; 3444 3445 /* Have we updated the tiling parameters upon the object and so 3446 * will need to serialise the write to the associated fence register? 3447 */ 3448 if (obj->fence_dirty) { 3449 ret = i915_gem_object_wait_fence(obj); 3450 if (ret) 3451 return ret; 3452 } 3453 3454 /* Just update our place in the LRU if our fence is getting reused. */ 3455 if (obj->fence_reg != I915_FENCE_REG_NONE) { 3456 reg = &dev_priv->fence_regs[obj->fence_reg]; 3457 if (!obj->fence_dirty) { 3458 list_move_tail(®->lru_list, 3459 &dev_priv->mm.fence_list); 3460 return 0; 3461 } 3462 } else if (enable) { 3463 reg = i915_find_fence_reg(dev); 3464 if (IS_ERR(reg)) 3465 return PTR_ERR(reg); 3466 3467 if (reg->obj) { 3468 struct drm_i915_gem_object *old = reg->obj; 3469 3470 ret = i915_gem_object_wait_fence(old); 3471 if (ret) 3472 return ret; 3473 3474 i915_gem_object_fence_lost(old); 3475 } 3476 } else 3477 return 0; 3478 3479 i915_gem_object_update_fence(obj, reg, enable); 3480 3481 return 0; 3482 } 3483 3484 static bool i915_gem_valid_gtt_space(struct drm_device *dev, 3485 struct drm_mm_node *gtt_space, 3486 unsigned long cache_level) 3487 { 3488 struct drm_mm_node *other; 3489 3490 /* On non-LLC machines we have to be careful when putting differing 3491 * types of snoopable memory together to avoid the prefetcher 3492 * crossing memory domains and dying. 3493 */ 3494 if (HAS_LLC(dev)) 3495 return true; 3496 3497 if (!drm_mm_node_allocated(gtt_space)) 3498 return true; 3499 3500 if (list_empty(>t_space->node_list)) 3501 return true; 3502 3503 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list); 3504 if (other->allocated && !other->hole_follows && other->color != cache_level) 3505 return false; 3506 3507 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list); 3508 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level) 3509 return false; 3510 3511 return true; 3512 } 3513 3514 static void i915_gem_verify_gtt(struct drm_device *dev) 3515 { 3516 #if WATCH_GTT 3517 struct drm_i915_private *dev_priv = dev->dev_private; 3518 struct drm_i915_gem_object *obj; 3519 int err = 0; 3520 3521 list_for_each_entry(obj, &dev_priv->mm.gtt_list, global_list) { 3522 if (obj->gtt_space == NULL) { 3523 printk(KERN_ERR "object found on GTT list with no space reserved\n"); 3524 err++; 3525 continue; 3526 } 3527 3528 if (obj->cache_level != obj->gtt_space->color) { 3529 printk(KERN_ERR "object reserved space [%08lx, %08lx] with wrong color, cache_level=%x, color=%lx\n", 3530 i915_gem_obj_ggtt_offset(obj), 3531 i915_gem_obj_ggtt_offset(obj) + i915_gem_obj_ggtt_size(obj), 3532 obj->cache_level, 3533 obj->gtt_space->color); 3534 err++; 3535 continue; 3536 } 3537 3538 if (!i915_gem_valid_gtt_space(dev, 3539 obj->gtt_space, 3540 obj->cache_level)) { 3541 printk(KERN_ERR "invalid GTT space found at [%08lx, %08lx] - color=%x\n", 3542 i915_gem_obj_ggtt_offset(obj), 3543 i915_gem_obj_ggtt_offset(obj) + i915_gem_obj_ggtt_size(obj), 3544 obj->cache_level); 3545 err++; 3546 continue; 3547 } 3548 } 3549 3550 WARN_ON(err); 3551 #endif 3552 } 3553 3554 /** 3555 * Finds free space in the GTT aperture and binds the object there. 3556 */ 3557 static int 3558 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, 3559 struct i915_address_space *vm, 3560 unsigned alignment, 3561 bool map_and_fenceable, 3562 bool nonblocking) 3563 { 3564 struct drm_device *dev = obj->base.dev; 3565 drm_i915_private_t *dev_priv = dev->dev_private; 3566 u32 size, fence_size, fence_alignment, unfenced_alignment; 3567 size_t gtt_max = 3568 map_and_fenceable ? dev_priv->gtt.mappable_end : vm->total; 3569 struct i915_vma *vma; 3570 int ret; 3571 3572 fence_size = i915_gem_get_gtt_size(dev, 3573 obj->base.size, 3574 obj->tiling_mode); 3575 fence_alignment = i915_gem_get_gtt_alignment(dev, 3576 obj->base.size, 3577 obj->tiling_mode, true); 3578 unfenced_alignment = 3579 i915_gem_get_gtt_alignment(dev, 3580 obj->base.size, 3581 obj->tiling_mode, false); 3582 3583 if (alignment == 0) 3584 alignment = map_and_fenceable ? fence_alignment : 3585 unfenced_alignment; 3586 if (map_and_fenceable && alignment & (fence_alignment - 1)) { 3587 DRM_ERROR("Invalid object alignment requested %u\n", alignment); 3588 return -EINVAL; 3589 } 3590 3591 size = map_and_fenceable ? fence_size : obj->base.size; 3592 3593 /* If the object is bigger than the entire aperture, reject it early 3594 * before evicting everything in a vain attempt to find space. 3595 */ 3596 if (obj->base.size > gtt_max) { 3597 DRM_ERROR("Attempting to bind an object larger than the aperture: object=%zd > %s aperture=%zu\n", 3598 obj->base.size, 3599 map_and_fenceable ? "mappable" : "total", 3600 gtt_max); 3601 return -E2BIG; 3602 } 3603 3604 ret = i915_gem_object_get_pages(obj); 3605 if (ret) 3606 return ret; 3607 3608 i915_gem_object_pin_pages(obj); 3609 3610 BUG_ON(!i915_is_ggtt(vm)); 3611 3612 vma = i915_gem_obj_lookup_or_create_vma(obj, vm); 3613 if (IS_ERR(vma)) { 3614 ret = PTR_ERR(vma); 3615 goto err_unpin; 3616 } 3617 3618 /* For now we only ever use 1 vma per object */ 3619 WARN_ON(!list_is_singular(&obj->vma_list)); 3620 3621 search_free: 3622 ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, 3623 size, alignment, 3624 obj->cache_level, 0, gtt_max, 3625 DRM_MM_SEARCH_DEFAULT); 3626 if (ret) { 3627 ret = i915_gem_evict_something(dev, vm, size, alignment, 3628 obj->cache_level, 3629 map_and_fenceable, 3630 nonblocking); 3631 if (ret == 0) 3632 goto search_free; 3633 3634 goto err_free_vma; 3635 } 3636 if (WARN_ON(!i915_gem_valid_gtt_space(dev, &vma->node, 3637 obj->cache_level))) { 3638 ret = -EINVAL; 3639 goto err_remove_node; 3640 } 3641 3642 ret = i915_gem_gtt_prepare_object(obj); 3643 if (ret) 3644 goto err_remove_node; 3645 3646 list_move_tail(&obj->global_list, &dev_priv->mm.bound_list); 3647 list_add_tail(&vma->mm_list, &vm->inactive_list); 3648 3649 if (i915_is_ggtt(vm)) { 3650 bool mappable, fenceable; 3651 3652 fenceable = (vma->node.size == fence_size && 3653 (vma->node.start & (fence_alignment - 1)) == 0); 3654 3655 mappable = (vma->node.start + obj->base.size <= 3656 dev_priv->gtt.mappable_end); 3657 3658 obj->map_and_fenceable = mappable && fenceable; 3659 } 3660 3661 WARN_ON(map_and_fenceable && !obj->map_and_fenceable); 3662 3663 trace_i915_vma_bind(vma, map_and_fenceable); 3664 i915_gem_verify_gtt(dev); 3665 return 0; 3666 3667 err_remove_node: 3668 drm_mm_remove_node(&vma->node); 3669 err_free_vma: 3670 i915_gem_vma_destroy(vma); 3671 err_unpin: 3672 i915_gem_object_unpin_pages(obj); 3673 /* XXX Until we've hooked up the shrinking functions. */ 3674 i915_gem_object_put_pages(obj); 3675 return ret; 3676 } 3677 3678 bool 3679 i915_gem_clflush_object(struct drm_i915_gem_object *obj, 3680 bool force) 3681 { 3682 /* If we don't have a page list set up, then we're not pinned 3683 * to GPU, and we can ignore the cache flush because it'll happen 3684 * again at bind time. 3685 */ 3686 if (obj->pages == NULL) 3687 return false; 3688 3689 /* 3690 * Stolen memory is always coherent with the GPU as it is explicitly 3691 * marked as wc by the system, or the system is cache-coherent. 3692 */ 3693 if (obj->stolen) 3694 return false; 3695 3696 /* If the GPU is snooping the contents of the CPU cache, 3697 * we do not need to manually clear the CPU cache lines. However, 3698 * the caches are only snooped when the render cache is 3699 * flushed/invalidated. As we always have to emit invalidations 3700 * and flushes when moving into and out of the RENDER domain, correct 3701 * snooping behaviour occurs naturally as the result of our domain 3702 * tracking. 3703 */ 3704 if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) 3705 return false; 3706 3707 trace_i915_gem_object_clflush(obj); 3708 #if 0 3709 drm_clflush_sg(obj->pages); 3710 #else 3711 { 3712 int page_count = obj->base.size >> PAGE_SHIFT; 3713 int i; 3714 3715 for (i = 0; i < page_count; i++) 3716 pmap_flush_page(VM_PAGE_TO_PHYS(obj->pages[i])); 3717 } 3718 #endif 3719 return true; 3720 } 3721 3722 /** Flushes the GTT write domain for the object if it's dirty. */ 3723 static void 3724 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) 3725 { 3726 uint32_t old_write_domain; 3727 3728 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) 3729 return; 3730 3731 /* No actual flushing is required for the GTT write domain. Writes 3732 * to it immediately go to main memory as far as we know, so there's 3733 * no chipset flush. It also doesn't land in render cache. 3734 * 3735 * However, we do have to enforce the order so that all writes through 3736 * the GTT land before any writes to the device, such as updates to 3737 * the GATT itself. 3738 */ 3739 wmb(); 3740 3741 old_write_domain = obj->base.write_domain; 3742 obj->base.write_domain = 0; 3743 3744 trace_i915_gem_object_change_domain(obj, 3745 obj->base.read_domains, 3746 old_write_domain); 3747 } 3748 3749 /** Flushes the CPU write domain for the object if it's dirty. */ 3750 static void 3751 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj, 3752 bool force) 3753 { 3754 uint32_t old_write_domain; 3755 3756 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) 3757 return; 3758 3759 if (i915_gem_clflush_object(obj, force)) 3760 i915_gem_chipset_flush(obj->base.dev); 3761 3762 old_write_domain = obj->base.write_domain; 3763 obj->base.write_domain = 0; 3764 3765 trace_i915_gem_object_change_domain(obj, 3766 obj->base.read_domains, 3767 old_write_domain); 3768 } 3769 3770 /** 3771 * Moves a single object to the GTT read, and possibly write domain. 3772 * 3773 * This function returns when the move is complete, including waiting on 3774 * flushes to occur. 3775 */ 3776 int 3777 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 3778 { 3779 drm_i915_private_t *dev_priv = obj->base.dev->dev_private; 3780 uint32_t old_write_domain, old_read_domains; 3781 int ret; 3782 3783 /* Not valid to be called on unbound objects. */ 3784 if (!i915_gem_obj_bound_any(obj)) 3785 return -EINVAL; 3786 3787 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) 3788 return 0; 3789 3790 ret = i915_gem_object_wait_rendering(obj, !write); 3791 if (ret) 3792 return ret; 3793 3794 i915_gem_object_flush_cpu_write_domain(obj, false); 3795 3796 /* Serialise direct access to this object with the barriers for 3797 * coherent writes from the GPU, by effectively invalidating the 3798 * GTT domain upon first access. 3799 */ 3800 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3801 mb(); 3802 3803 old_write_domain = obj->base.write_domain; 3804 old_read_domains = obj->base.read_domains; 3805 3806 /* It should now be out of any other write domains, and we can update 3807 * the domain values for our changes. 3808 */ 3809 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 3810 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 3811 if (write) { 3812 obj->base.read_domains = I915_GEM_DOMAIN_GTT; 3813 obj->base.write_domain = I915_GEM_DOMAIN_GTT; 3814 obj->dirty = 1; 3815 } 3816 3817 trace_i915_gem_object_change_domain(obj, 3818 old_read_domains, 3819 old_write_domain); 3820 3821 /* And bump the LRU for this access */ 3822 if (i915_gem_object_is_inactive(obj)) { 3823 struct i915_vma *vma = i915_gem_obj_to_ggtt(obj); 3824 if (vma) 3825 list_move_tail(&vma->mm_list, 3826 &dev_priv->gtt.base.inactive_list); 3827 3828 } 3829 3830 return 0; 3831 } 3832 3833 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 3834 enum i915_cache_level cache_level) 3835 { 3836 struct drm_device *dev = obj->base.dev; 3837 drm_i915_private_t *dev_priv = dev->dev_private; 3838 struct i915_vma *vma, *next; 3839 int ret; 3840 3841 if (obj->cache_level == cache_level) 3842 return 0; 3843 3844 if (obj->pin_count) { 3845 DRM_DEBUG("can not change the cache level of pinned objects\n"); 3846 return -EBUSY; 3847 } 3848 3849 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) { 3850 if (!i915_gem_valid_gtt_space(dev, &vma->node, cache_level)) { 3851 ret = i915_vma_unbind(vma); 3852 if (ret) 3853 return ret; 3854 3855 break; 3856 } 3857 } 3858 3859 if (i915_gem_obj_bound_any(obj)) { 3860 ret = i915_gem_object_finish_gpu(obj); 3861 if (ret) 3862 return ret; 3863 3864 i915_gem_object_finish_gtt(obj); 3865 3866 /* Before SandyBridge, you could not use tiling or fence 3867 * registers with snooped memory, so relinquish any fences 3868 * currently pointing to our region in the aperture. 3869 */ 3870 if (INTEL_INFO(dev)->gen < 6) { 3871 ret = i915_gem_object_put_fence(obj); 3872 if (ret) 3873 return ret; 3874 } 3875 3876 if (obj->has_global_gtt_mapping) 3877 i915_gem_gtt_bind_object(obj, cache_level); 3878 if (obj->has_aliasing_ppgtt_mapping) 3879 i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt, 3880 obj, cache_level); 3881 } 3882 3883 list_for_each_entry(vma, &obj->vma_list, vma_link) 3884 vma->node.color = cache_level; 3885 obj->cache_level = cache_level; 3886 3887 if (cpu_write_needs_clflush(obj)) { 3888 u32 old_read_domains, old_write_domain; 3889 3890 /* If we're coming from LLC cached, then we haven't 3891 * actually been tracking whether the data is in the 3892 * CPU cache or not, since we only allow one bit set 3893 * in obj->write_domain and have been skipping the clflushes. 3894 * Just set it to the CPU cache for now. 3895 */ 3896 WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU); 3897 3898 old_read_domains = obj->base.read_domains; 3899 old_write_domain = obj->base.write_domain; 3900 3901 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 3902 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 3903 3904 trace_i915_gem_object_change_domain(obj, 3905 old_read_domains, 3906 old_write_domain); 3907 } 3908 3909 i915_gem_verify_gtt(dev); 3910 return 0; 3911 } 3912 3913 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 3914 struct drm_file *file) 3915 { 3916 struct drm_i915_gem_caching *args = data; 3917 struct drm_i915_gem_object *obj; 3918 int ret; 3919 3920 ret = i915_mutex_lock_interruptible(dev); 3921 if (ret) 3922 return ret; 3923 3924 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3925 if (&obj->base == NULL) { 3926 ret = -ENOENT; 3927 goto unlock; 3928 } 3929 3930 switch (obj->cache_level) { 3931 case I915_CACHE_LLC: 3932 case I915_CACHE_L3_LLC: 3933 args->caching = I915_CACHING_CACHED; 3934 break; 3935 3936 case I915_CACHE_WT: 3937 args->caching = I915_CACHING_DISPLAY; 3938 break; 3939 3940 default: 3941 args->caching = I915_CACHING_NONE; 3942 break; 3943 } 3944 3945 drm_gem_object_unreference(&obj->base); 3946 unlock: 3947 mutex_unlock(&dev->struct_mutex); 3948 return ret; 3949 } 3950 3951 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 3952 struct drm_file *file) 3953 { 3954 struct drm_i915_gem_caching *args = data; 3955 struct drm_i915_gem_object *obj; 3956 enum i915_cache_level level; 3957 int ret; 3958 3959 switch (args->caching) { 3960 case I915_CACHING_NONE: 3961 level = I915_CACHE_NONE; 3962 break; 3963 case I915_CACHING_CACHED: 3964 level = I915_CACHE_LLC; 3965 break; 3966 case I915_CACHING_DISPLAY: 3967 level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE; 3968 break; 3969 default: 3970 return -EINVAL; 3971 } 3972 3973 ret = i915_mutex_lock_interruptible(dev); 3974 if (ret) 3975 return ret; 3976 3977 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3978 if (&obj->base == NULL) { 3979 ret = -ENOENT; 3980 goto unlock; 3981 } 3982 3983 ret = i915_gem_object_set_cache_level(obj, level); 3984 3985 drm_gem_object_unreference(&obj->base); 3986 unlock: 3987 mutex_unlock(&dev->struct_mutex); 3988 return ret; 3989 } 3990 3991 static bool is_pin_display(struct drm_i915_gem_object *obj) 3992 { 3993 /* There are 3 sources that pin objects: 3994 * 1. The display engine (scanouts, sprites, cursors); 3995 * 2. Reservations for execbuffer; 3996 * 3. The user. 3997 * 3998 * We can ignore reservations as we hold the struct_mutex and 3999 * are only called outside of the reservation path. The user 4000 * can only increment pin_count once, and so if after 4001 * subtracting the potential reference by the user, any pin_count 4002 * remains, it must be due to another use by the display engine. 4003 */ 4004 return obj->pin_count - !!obj->user_pin_count; 4005 } 4006 4007 /* 4008 * Prepare buffer for display plane (scanout, cursors, etc). 4009 * Can be called from an uninterruptible phase (modesetting) and allows 4010 * any flushes to be pipelined (for pageflips). 4011 */ 4012 int 4013 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 4014 u32 alignment, 4015 struct intel_ring_buffer *pipelined) 4016 { 4017 u32 old_read_domains, old_write_domain; 4018 int ret; 4019 4020 if (pipelined != obj->ring) { 4021 ret = i915_gem_object_sync(obj, pipelined); 4022 if (ret) 4023 return ret; 4024 } 4025 4026 /* Mark the pin_display early so that we account for the 4027 * display coherency whilst setting up the cache domains. 4028 */ 4029 obj->pin_display = true; 4030 4031 /* The display engine is not coherent with the LLC cache on gen6. As 4032 * a result, we make sure that the pinning that is about to occur is 4033 * done with uncached PTEs. This is lowest common denominator for all 4034 * chipsets. 4035 * 4036 * However for gen6+, we could do better by using the GFDT bit instead 4037 * of uncaching, which would allow us to flush all the LLC-cached data 4038 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 4039 */ 4040 ret = i915_gem_object_set_cache_level(obj, 4041 HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE); 4042 if (ret) 4043 goto err_unpin_display; 4044 4045 /* As the user may map the buffer once pinned in the display plane 4046 * (e.g. libkms for the bootup splash), we have to ensure that we 4047 * always use map_and_fenceable for all scanout buffers. 4048 */ 4049 ret = i915_gem_obj_ggtt_pin(obj, alignment, true, false); 4050 if (ret) 4051 goto err_unpin_display; 4052 4053 i915_gem_object_flush_cpu_write_domain(obj, true); 4054 4055 old_write_domain = obj->base.write_domain; 4056 old_read_domains = obj->base.read_domains; 4057 4058 /* It should now be out of any other write domains, and we can update 4059 * the domain values for our changes. 4060 */ 4061 obj->base.write_domain = 0; 4062 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 4063 4064 trace_i915_gem_object_change_domain(obj, 4065 old_read_domains, 4066 old_write_domain); 4067 4068 return 0; 4069 4070 err_unpin_display: 4071 obj->pin_display = is_pin_display(obj); 4072 return ret; 4073 } 4074 4075 void 4076 i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj) 4077 { 4078 i915_gem_object_unpin(obj); 4079 obj->pin_display = is_pin_display(obj); 4080 } 4081 4082 int 4083 i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj) 4084 { 4085 int ret; 4086 4087 if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0) 4088 return 0; 4089 4090 ret = i915_gem_object_wait_rendering(obj, false); 4091 if (ret) 4092 return ret; 4093 4094 /* Ensure that we invalidate the GPU's caches and TLBs. */ 4095 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS; 4096 return 0; 4097 } 4098 4099 /** 4100 * Moves a single object to the CPU read, and possibly write domain. 4101 * 4102 * This function returns when the move is complete, including waiting on 4103 * flushes to occur. 4104 */ 4105 int 4106 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 4107 { 4108 uint32_t old_write_domain, old_read_domains; 4109 int ret; 4110 4111 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) 4112 return 0; 4113 4114 ret = i915_gem_object_wait_rendering(obj, !write); 4115 if (ret) 4116 return ret; 4117 4118 i915_gem_object_flush_gtt_write_domain(obj); 4119 4120 old_write_domain = obj->base.write_domain; 4121 old_read_domains = obj->base.read_domains; 4122 4123 /* Flush the CPU cache if it's still invalid. */ 4124 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { 4125 i915_gem_clflush_object(obj, false); 4126 4127 obj->base.read_domains |= I915_GEM_DOMAIN_CPU; 4128 } 4129 4130 /* It should now be out of any other write domains, and we can update 4131 * the domain values for our changes. 4132 */ 4133 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 4134 4135 /* If we're writing through the CPU, then the GPU read domains will 4136 * need to be invalidated at next use. 4137 */ 4138 if (write) { 4139 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4140 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4141 } 4142 4143 trace_i915_gem_object_change_domain(obj, 4144 old_read_domains, 4145 old_write_domain); 4146 4147 return 0; 4148 } 4149 4150 /* Throttle our rendering by waiting until the ring has completed our requests 4151 * emitted over 20 msec ago. 4152 * 4153 * Note that if we were to use the current jiffies each time around the loop, 4154 * we wouldn't escape the function with any frames outstanding if the time to 4155 * render a frame was over 20ms. 4156 * 4157 * This should get us reasonable parallelism between CPU and GPU but also 4158 * relatively low latency when blocking on a particular request to finish. 4159 */ 4160 static int 4161 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 4162 { 4163 struct drm_i915_private *dev_priv = dev->dev_private; 4164 struct drm_i915_file_private *file_priv = file->driver_priv; 4165 unsigned long recent_enough = jiffies - msecs_to_jiffies(20); 4166 struct drm_i915_gem_request *request; 4167 struct intel_ring_buffer *ring = NULL; 4168 unsigned reset_counter; 4169 u32 seqno = 0; 4170 int ret; 4171 4172 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 4173 if (ret) 4174 return ret; 4175 4176 ret = i915_gem_check_wedge(&dev_priv->gpu_error, false); 4177 if (ret) 4178 return ret; 4179 4180 spin_lock(&file_priv->mm.lock); 4181 list_for_each_entry(request, &file_priv->mm.request_list, client_list) { 4182 if (time_after_eq(request->emitted_jiffies, recent_enough)) 4183 break; 4184 4185 ring = request->ring; 4186 seqno = request->seqno; 4187 } 4188 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 4189 spin_unlock(&file_priv->mm.lock); 4190 4191 if (seqno == 0) 4192 return 0; 4193 4194 ret = __wait_seqno(ring, seqno, reset_counter, true, NULL, NULL); 4195 if (ret == 0) 4196 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0); 4197 4198 return ret; 4199 } 4200 4201 int 4202 i915_gem_object_pin(struct drm_i915_gem_object *obj, 4203 struct i915_address_space *vm, 4204 uint32_t alignment, 4205 bool map_and_fenceable, 4206 bool nonblocking) 4207 { 4208 struct i915_vma *vma; 4209 int ret; 4210 4211 if (WARN_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) 4212 return -EBUSY; 4213 4214 WARN_ON(map_and_fenceable && !i915_is_ggtt(vm)); 4215 4216 vma = i915_gem_obj_to_vma(obj, vm); 4217 4218 if (vma) { 4219 if ((alignment && 4220 vma->node.start & (alignment - 1)) || 4221 (map_and_fenceable && !obj->map_and_fenceable)) { 4222 WARN(obj->pin_count, 4223 "bo is already pinned with incorrect alignment:" 4224 " offset=%lx, req.alignment=%x, req.map_and_fenceable=%d," 4225 " obj->map_and_fenceable=%d\n", 4226 i915_gem_obj_offset(obj, vm), alignment, 4227 map_and_fenceable, 4228 obj->map_and_fenceable); 4229 ret = i915_vma_unbind(vma); 4230 if (ret) 4231 return ret; 4232 } 4233 } 4234 4235 if (!i915_gem_obj_bound(obj, vm)) { 4236 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 4237 4238 ret = i915_gem_object_bind_to_vm(obj, vm, alignment, 4239 map_and_fenceable, 4240 nonblocking); 4241 if (ret) 4242 return ret; 4243 4244 if (!dev_priv->mm.aliasing_ppgtt) 4245 i915_gem_gtt_bind_object(obj, obj->cache_level); 4246 } 4247 4248 if (!obj->has_global_gtt_mapping && map_and_fenceable) 4249 i915_gem_gtt_bind_object(obj, obj->cache_level); 4250 4251 obj->pin_count++; 4252 obj->pin_mappable |= map_and_fenceable; 4253 4254 return 0; 4255 } 4256 4257 void 4258 i915_gem_object_unpin(struct drm_i915_gem_object *obj) 4259 { 4260 BUG_ON(obj->pin_count == 0); 4261 BUG_ON(!i915_gem_obj_bound_any(obj)); 4262 4263 if (--obj->pin_count == 0) 4264 obj->pin_mappable = false; 4265 } 4266 4267 int 4268 i915_gem_pin_ioctl(struct drm_device *dev, void *data, 4269 struct drm_file *file) 4270 { 4271 struct drm_i915_gem_pin *args = data; 4272 struct drm_i915_gem_object *obj; 4273 int ret; 4274 4275 ret = i915_mutex_lock_interruptible(dev); 4276 if (ret) 4277 return ret; 4278 4279 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4280 if (&obj->base == NULL) { 4281 ret = -ENOENT; 4282 goto unlock; 4283 } 4284 4285 if (obj->madv != I915_MADV_WILLNEED) { 4286 DRM_ERROR("Attempting to pin a purgeable buffer\n"); 4287 ret = -EINVAL; 4288 goto out; 4289 } 4290 4291 if (obj->pin_filp != NULL && obj->pin_filp != file) { 4292 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n", 4293 args->handle); 4294 ret = -EINVAL; 4295 goto out; 4296 } 4297 4298 if (obj->user_pin_count == ULONG_MAX) { 4299 ret = -EBUSY; 4300 goto out; 4301 } 4302 4303 if (obj->user_pin_count == 0) { 4304 ret = i915_gem_obj_ggtt_pin(obj, args->alignment, true, false); 4305 if (ret) 4306 goto out; 4307 } 4308 4309 obj->user_pin_count++; 4310 obj->pin_filp = file; 4311 4312 args->offset = i915_gem_obj_ggtt_offset(obj); 4313 out: 4314 drm_gem_object_unreference(&obj->base); 4315 unlock: 4316 mutex_unlock(&dev->struct_mutex); 4317 return ret; 4318 } 4319 4320 int 4321 i915_gem_unpin_ioctl(struct drm_device *dev, void *data, 4322 struct drm_file *file) 4323 { 4324 struct drm_i915_gem_pin *args = data; 4325 struct drm_i915_gem_object *obj; 4326 int ret; 4327 4328 ret = i915_mutex_lock_interruptible(dev); 4329 if (ret) 4330 return ret; 4331 4332 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4333 if (&obj->base == NULL) { 4334 ret = -ENOENT; 4335 goto unlock; 4336 } 4337 4338 if (obj->pin_filp != file) { 4339 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n", 4340 args->handle); 4341 ret = -EINVAL; 4342 goto out; 4343 } 4344 obj->user_pin_count--; 4345 if (obj->user_pin_count == 0) { 4346 obj->pin_filp = NULL; 4347 i915_gem_object_unpin(obj); 4348 } 4349 4350 out: 4351 drm_gem_object_unreference(&obj->base); 4352 unlock: 4353 mutex_unlock(&dev->struct_mutex); 4354 return ret; 4355 } 4356 4357 int 4358 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 4359 struct drm_file *file) 4360 { 4361 struct drm_i915_gem_busy *args = data; 4362 struct drm_i915_gem_object *obj; 4363 int ret; 4364 4365 ret = i915_mutex_lock_interruptible(dev); 4366 if (ret) 4367 return ret; 4368 4369 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4370 if (&obj->base == NULL) { 4371 ret = -ENOENT; 4372 goto unlock; 4373 } 4374 4375 /* Count all active objects as busy, even if they are currently not used 4376 * by the gpu. Users of this interface expect objects to eventually 4377 * become non-busy without any further actions, therefore emit any 4378 * necessary flushes here. 4379 */ 4380 ret = i915_gem_object_flush_active(obj); 4381 4382 args->busy = obj->active; 4383 if (obj->ring) { 4384 BUILD_BUG_ON(I915_NUM_RINGS > 16); 4385 args->busy |= intel_ring_flag(obj->ring) << 16; 4386 } 4387 4388 drm_gem_object_unreference(&obj->base); 4389 unlock: 4390 mutex_unlock(&dev->struct_mutex); 4391 return ret; 4392 } 4393 4394 int 4395 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 4396 struct drm_file *file_priv) 4397 { 4398 return i915_gem_ring_throttle(dev, file_priv); 4399 } 4400 4401 int 4402 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 4403 struct drm_file *file_priv) 4404 { 4405 struct drm_i915_gem_madvise *args = data; 4406 struct drm_i915_gem_object *obj; 4407 int ret; 4408 4409 switch (args->madv) { 4410 case I915_MADV_DONTNEED: 4411 case I915_MADV_WILLNEED: 4412 break; 4413 default: 4414 return -EINVAL; 4415 } 4416 4417 ret = i915_mutex_lock_interruptible(dev); 4418 if (ret) 4419 return ret; 4420 4421 obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle)); 4422 if (&obj->base == NULL) { 4423 ret = -ENOENT; 4424 goto unlock; 4425 } 4426 4427 if (obj->pin_count) { 4428 ret = -EINVAL; 4429 goto out; 4430 } 4431 4432 if (obj->madv != __I915_MADV_PURGED) 4433 obj->madv = args->madv; 4434 4435 /* if the object is no longer attached, discard its backing storage */ 4436 if (i915_gem_object_is_purgeable(obj) && obj->pages == NULL) 4437 i915_gem_object_truncate(obj); 4438 4439 args->retained = obj->madv != __I915_MADV_PURGED; 4440 4441 out: 4442 drm_gem_object_unreference(&obj->base); 4443 unlock: 4444 mutex_unlock(&dev->struct_mutex); 4445 return ret; 4446 } 4447 4448 void i915_gem_object_init(struct drm_i915_gem_object *obj, 4449 const struct drm_i915_gem_object_ops *ops) 4450 { 4451 INIT_LIST_HEAD(&obj->global_list); 4452 INIT_LIST_HEAD(&obj->ring_list); 4453 INIT_LIST_HEAD(&obj->obj_exec_link); 4454 INIT_LIST_HEAD(&obj->vma_list); 4455 4456 obj->ops = ops; 4457 4458 obj->fence_reg = I915_FENCE_REG_NONE; 4459 obj->madv = I915_MADV_WILLNEED; 4460 /* Avoid an unnecessary call to unbind on the first bind. */ 4461 obj->map_and_fenceable = true; 4462 4463 i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size); 4464 } 4465 4466 static const struct drm_i915_gem_object_ops i915_gem_object_ops = { 4467 .get_pages = i915_gem_object_get_pages_gtt, 4468 .put_pages = i915_gem_object_put_pages_gtt, 4469 }; 4470 4471 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, 4472 size_t size) 4473 { 4474 struct drm_i915_gem_object *obj; 4475 #ifdef __linux__ 4476 struct address_space *mapping; 4477 gfp_t mask; 4478 #endif 4479 4480 obj = i915_gem_object_alloc(dev); 4481 if (obj == NULL) 4482 return NULL; 4483 4484 if (drm_gem_object_init(dev, &obj->base, size) != 0) { 4485 i915_gem_object_free(obj); 4486 return NULL; 4487 } 4488 4489 #ifdef __linux__ 4490 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 4491 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) { 4492 /* 965gm cannot relocate objects above 4GiB. */ 4493 mask &= ~__GFP_HIGHMEM; 4494 mask |= __GFP_DMA32; 4495 } 4496 4497 mapping = file_inode(obj->base.filp)->i_mapping; 4498 mapping_set_gfp_mask(mapping, mask); 4499 #endif 4500 4501 i915_gem_object_init(obj, &i915_gem_object_ops); 4502 4503 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4504 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4505 4506 if (HAS_LLC(dev)) { 4507 /* On some devices, we can have the GPU use the LLC (the CPU 4508 * cache) for about a 10% performance improvement 4509 * compared to uncached. Graphics requests other than 4510 * display scanout are coherent with the CPU in 4511 * accessing this cache. This means in this mode we 4512 * don't need to clflush on the CPU side, and on the 4513 * GPU side we only need to flush internal caches to 4514 * get data visible to the CPU. 4515 * 4516 * However, we maintain the display planes as UC, and so 4517 * need to rebind when first used as such. 4518 */ 4519 obj->cache_level = I915_CACHE_LLC; 4520 } else 4521 obj->cache_level = I915_CACHE_NONE; 4522 4523 trace_i915_gem_object_create(obj); 4524 4525 return obj; 4526 } 4527 4528 void i915_gem_free_object(struct drm_gem_object *gem_obj) 4529 { 4530 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 4531 struct drm_device *dev = obj->base.dev; 4532 drm_i915_private_t *dev_priv = dev->dev_private; 4533 struct i915_vma *vma, *next; 4534 4535 intel_runtime_pm_get(dev_priv); 4536 4537 trace_i915_gem_object_destroy(obj); 4538 4539 if (obj->phys_obj) 4540 i915_gem_detach_phys_object(dev, obj); 4541 4542 obj->pin_count = 0; 4543 /* NB: 0 or 1 elements */ 4544 WARN_ON(!list_empty(&obj->vma_list) && 4545 !list_is_singular(&obj->vma_list)); 4546 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) { 4547 int ret = i915_vma_unbind(vma); 4548 if (WARN_ON(ret == -ERESTARTSYS)) { 4549 bool was_interruptible; 4550 4551 was_interruptible = dev_priv->mm.interruptible; 4552 dev_priv->mm.interruptible = false; 4553 4554 WARN_ON(i915_vma_unbind(vma)); 4555 4556 dev_priv->mm.interruptible = was_interruptible; 4557 } 4558 } 4559 4560 /* Stolen objects don't hold a ref, but do hold pin count. Fix that up 4561 * before progressing. */ 4562 if (obj->stolen) 4563 i915_gem_object_unpin_pages(obj); 4564 4565 if (WARN_ON(obj->pages_pin_count)) 4566 obj->pages_pin_count = 0; 4567 i915_gem_object_put_pages(obj); 4568 i915_gem_object_free_mmap_offset(obj); 4569 i915_gem_object_release_stolen(obj); 4570 4571 BUG_ON(obj->pages); 4572 4573 #ifdef notyet 4574 if (obj->base.import_attach) 4575 drm_prime_gem_destroy(&obj->base, NULL); 4576 #endif 4577 4578 drm_gem_object_release(&obj->base); 4579 i915_gem_info_remove_obj(dev_priv, obj->base.size); 4580 4581 kfree(obj->bit_17); 4582 i915_gem_object_free(obj); 4583 4584 intel_runtime_pm_put(dev_priv); 4585 } 4586 4587 struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, 4588 struct i915_address_space *vm) 4589 { 4590 struct i915_vma *vma; 4591 list_for_each_entry(vma, &obj->vma_list, vma_link) 4592 if (vma->vm == vm) 4593 return vma; 4594 4595 return NULL; 4596 } 4597 4598 static struct i915_vma *__i915_gem_vma_create(struct drm_i915_gem_object *obj, 4599 struct i915_address_space *vm) 4600 { 4601 struct i915_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL); 4602 if (vma == NULL) 4603 return ERR_PTR(-ENOMEM); 4604 4605 INIT_LIST_HEAD(&vma->vma_link); 4606 INIT_LIST_HEAD(&vma->mm_list); 4607 INIT_LIST_HEAD(&vma->exec_list); 4608 vma->vm = vm; 4609 vma->obj = obj; 4610 4611 /* Keep GGTT vmas first to make debug easier */ 4612 if (i915_is_ggtt(vm)) 4613 list_add(&vma->vma_link, &obj->vma_list); 4614 else 4615 list_add_tail(&vma->vma_link, &obj->vma_list); 4616 4617 return vma; 4618 } 4619 4620 struct i915_vma * 4621 i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, 4622 struct i915_address_space *vm) 4623 { 4624 struct i915_vma *vma; 4625 4626 vma = i915_gem_obj_to_vma(obj, vm); 4627 if (!vma) 4628 vma = __i915_gem_vma_create(obj, vm); 4629 4630 return vma; 4631 } 4632 4633 void i915_gem_vma_destroy(struct i915_vma *vma) 4634 { 4635 WARN_ON(vma->node.allocated); 4636 4637 /* Keep the vma as a placeholder in the execbuffer reservation lists */ 4638 if (!list_empty(&vma->exec_list)) 4639 return; 4640 4641 list_del(&vma->vma_link); 4642 4643 kfree(vma); 4644 } 4645 4646 int 4647 i915_gem_suspend(struct drm_device *dev) 4648 { 4649 drm_i915_private_t *dev_priv = dev->dev_private; 4650 int ret = 0; 4651 4652 mutex_lock(&dev->struct_mutex); 4653 if (dev_priv->ums.mm_suspended) 4654 goto err; 4655 4656 ret = i915_gpu_idle(dev); 4657 if (ret) 4658 goto err; 4659 4660 i915_gem_retire_requests(dev); 4661 4662 /* Under UMS, be paranoid and evict. */ 4663 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 4664 i915_gem_evict_everything(dev); 4665 4666 i915_kernel_lost_context(dev); 4667 i915_gem_cleanup_ringbuffer(dev); 4668 4669 /* Hack! Don't let anybody do execbuf while we don't control the chip. 4670 * We need to replace this with a semaphore, or something. 4671 * And not confound ums.mm_suspended! 4672 */ 4673 dev_priv->ums.mm_suspended = !drm_core_check_feature(dev, 4674 DRIVER_MODESET); 4675 mutex_unlock(&dev->struct_mutex); 4676 4677 del_timer_sync(&dev_priv->gpu_error.hangcheck_timer); 4678 cancel_delayed_work_sync(&dev_priv->mm.retire_work); 4679 cancel_delayed_work_sync(&dev_priv->mm.idle_work); 4680 4681 return 0; 4682 4683 err: 4684 mutex_unlock(&dev->struct_mutex); 4685 return ret; 4686 } 4687 4688 int i915_gem_l3_remap(struct intel_ring_buffer *ring, int slice) 4689 { 4690 struct drm_device *dev = ring->dev; 4691 drm_i915_private_t *dev_priv = dev->dev_private; 4692 u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200); 4693 u32 *remap_info = dev_priv->l3_parity.remap_info[slice]; 4694 int i, ret; 4695 4696 if (!HAS_L3_DPF(dev) || !remap_info) 4697 return 0; 4698 4699 ret = intel_ring_begin(ring, GEN7_L3LOG_SIZE / 4 * 3); 4700 if (ret) 4701 return ret; 4702 4703 /* 4704 * Note: We do not worry about the concurrent register cacheline hang 4705 * here because no other code should access these registers other than 4706 * at initialization time. 4707 */ 4708 for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) { 4709 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 4710 intel_ring_emit(ring, reg_base + i); 4711 intel_ring_emit(ring, remap_info[i/4]); 4712 } 4713 4714 intel_ring_advance(ring); 4715 4716 return ret; 4717 } 4718 4719 void i915_gem_init_swizzling(struct drm_device *dev) 4720 { 4721 drm_i915_private_t *dev_priv = dev->dev_private; 4722 4723 if (INTEL_INFO(dev)->gen < 5 || 4724 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 4725 return; 4726 4727 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 4728 DISP_TILE_SURFACE_SWIZZLING); 4729 4730 if (IS_GEN5(dev)) 4731 return; 4732 4733 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 4734 if (IS_GEN6(dev)) 4735 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 4736 else if (IS_GEN7(dev)) 4737 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 4738 else if (IS_GEN8(dev)) 4739 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW)); 4740 else 4741 BUG(); 4742 } 4743 4744 static bool 4745 intel_enable_blt(struct drm_device *dev) 4746 { 4747 if (!HAS_BLT(dev)) 4748 return false; 4749 4750 #ifdef notyet 4751 /* The blitter was dysfunctional on early prototypes */ 4752 if (IS_GEN6(dev) && dev->pdev->revision < 8) { 4753 DRM_INFO("BLT not supported on this pre-production hardware;" 4754 " graphics performance will be degraded.\n"); 4755 return false; 4756 } 4757 #endif 4758 4759 return true; 4760 } 4761 4762 static int i915_gem_init_rings(struct drm_device *dev) 4763 { 4764 struct drm_i915_private *dev_priv = dev->dev_private; 4765 int ret; 4766 4767 ret = intel_init_render_ring_buffer(dev); 4768 if (ret) 4769 return ret; 4770 4771 if (HAS_BSD(dev)) { 4772 ret = intel_init_bsd_ring_buffer(dev); 4773 if (ret) 4774 goto cleanup_render_ring; 4775 } 4776 4777 if (intel_enable_blt(dev)) { 4778 ret = intel_init_blt_ring_buffer(dev); 4779 if (ret) 4780 goto cleanup_bsd_ring; 4781 } 4782 4783 if (HAS_VEBOX(dev)) { 4784 ret = intel_init_vebox_ring_buffer(dev); 4785 if (ret) 4786 goto cleanup_blt_ring; 4787 } 4788 4789 4790 ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000)); 4791 if (ret) 4792 goto cleanup_vebox_ring; 4793 4794 return 0; 4795 4796 cleanup_vebox_ring: 4797 intel_cleanup_ring_buffer(&dev_priv->ring[VECS]); 4798 cleanup_blt_ring: 4799 intel_cleanup_ring_buffer(&dev_priv->ring[BCS]); 4800 cleanup_bsd_ring: 4801 intel_cleanup_ring_buffer(&dev_priv->ring[VCS]); 4802 cleanup_render_ring: 4803 intel_cleanup_ring_buffer(&dev_priv->ring[RCS]); 4804 4805 return ret; 4806 } 4807 4808 int 4809 i915_gem_init_hw(struct drm_device *dev) 4810 { 4811 drm_i915_private_t *dev_priv = dev->dev_private; 4812 int ret, i; 4813 4814 #ifdef notyet 4815 if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) 4816 return -EIO; 4817 #endif 4818 4819 if (dev_priv->ellc_size) 4820 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf)); 4821 4822 if (IS_HASWELL(dev)) 4823 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev) ? 4824 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); 4825 4826 if (HAS_PCH_NOP(dev)) { 4827 u32 temp = I915_READ(GEN7_MSG_CTL); 4828 temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK); 4829 I915_WRITE(GEN7_MSG_CTL, temp); 4830 } 4831 4832 i915_gem_init_swizzling(dev); 4833 4834 ret = i915_gem_init_rings(dev); 4835 if (ret) 4836 return ret; 4837 4838 for (i = 0; i < NUM_L3_SLICES(dev); i++) 4839 i915_gem_l3_remap(&dev_priv->ring[RCS], i); 4840 4841 /* 4842 * XXX: There was some w/a described somewhere suggesting loading 4843 * contexts before PPGTT. 4844 */ 4845 ret = i915_gem_context_init(dev); 4846 if (ret) { 4847 i915_gem_cleanup_ringbuffer(dev); 4848 DRM_ERROR("Context initialization failed %d\n", ret); 4849 return ret; 4850 } 4851 4852 if (dev_priv->mm.aliasing_ppgtt) { 4853 ret = dev_priv->mm.aliasing_ppgtt->enable(dev); 4854 if (ret) { 4855 i915_gem_cleanup_aliasing_ppgtt(dev); 4856 DRM_INFO("PPGTT enable failed. This is not fatal, but unexpected\n"); 4857 } 4858 } 4859 4860 return 0; 4861 } 4862 4863 int i915_gem_init(struct drm_device *dev) 4864 { 4865 struct drm_i915_private *dev_priv = dev->dev_private; 4866 int ret; 4867 4868 mutex_lock(&dev->struct_mutex); 4869 4870 if (IS_VALLEYVIEW(dev)) { 4871 /* VLVA0 (potential hack), BIOS isn't actually waking us */ 4872 I915_WRITE(VLV_GTLC_WAKE_CTRL, 1); 4873 if (wait_for((I915_READ(VLV_GTLC_PW_STATUS) & 1) == 1, 10)) 4874 DRM_DEBUG_DRIVER("allow wake ack timed out\n"); 4875 } 4876 4877 i915_gem_init_global_gtt(dev); 4878 4879 ret = i915_gem_init_hw(dev); 4880 mutex_unlock(&dev->struct_mutex); 4881 if (ret) { 4882 i915_gem_cleanup_aliasing_ppgtt(dev); 4883 return ret; 4884 } 4885 4886 #ifdef __linux__ 4887 /* Allow hardware batchbuffers unless told otherwise, but not for KMS. */ 4888 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 4889 dev_priv->dri1.allow_batchbuffer = 1; 4890 #endif 4891 return 0; 4892 } 4893 4894 void 4895 i915_gem_cleanup_ringbuffer(struct drm_device *dev) 4896 { 4897 drm_i915_private_t *dev_priv = dev->dev_private; 4898 struct intel_ring_buffer *ring; 4899 int i; 4900 4901 for_each_ring(ring, dev_priv, i) 4902 intel_cleanup_ring_buffer(ring); 4903 } 4904 4905 int 4906 i915_gem_entervt_ioctl(struct drm_device *dev, void *data, 4907 struct drm_file *file_priv) 4908 { 4909 struct drm_i915_private *dev_priv = dev->dev_private; 4910 int ret; 4911 4912 if (drm_core_check_feature(dev, DRIVER_MODESET)) 4913 return 0; 4914 4915 if (i915_reset_in_progress(&dev_priv->gpu_error)) { 4916 DRM_ERROR("Reenabling wedged hardware, good luck\n"); 4917 atomic_set(&dev_priv->gpu_error.reset_counter, 0); 4918 } 4919 4920 mutex_lock(&dev->struct_mutex); 4921 dev_priv->ums.mm_suspended = 0; 4922 4923 ret = i915_gem_init_hw(dev); 4924 if (ret != 0) { 4925 mutex_unlock(&dev->struct_mutex); 4926 return ret; 4927 } 4928 4929 BUG_ON(!list_empty(&dev_priv->gtt.base.active_list)); 4930 mutex_unlock(&dev->struct_mutex); 4931 4932 ret = drm_irq_install(dev); 4933 if (ret) 4934 goto cleanup_ringbuffer; 4935 4936 return 0; 4937 4938 cleanup_ringbuffer: 4939 mutex_lock(&dev->struct_mutex); 4940 i915_gem_cleanup_ringbuffer(dev); 4941 dev_priv->ums.mm_suspended = 1; 4942 mutex_unlock(&dev->struct_mutex); 4943 4944 return ret; 4945 } 4946 4947 int 4948 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, 4949 struct drm_file *file_priv) 4950 { 4951 if (drm_core_check_feature(dev, DRIVER_MODESET)) 4952 return 0; 4953 4954 drm_irq_uninstall(dev); 4955 4956 return i915_gem_suspend(dev); 4957 } 4958 4959 void 4960 i915_gem_lastclose(struct drm_device *dev) 4961 { 4962 int ret; 4963 4964 if (drm_core_check_feature(dev, DRIVER_MODESET)) 4965 return; 4966 4967 ret = i915_gem_suspend(dev); 4968 if (ret) 4969 DRM_ERROR("failed to idle hardware: %d\n", ret); 4970 } 4971 4972 static void 4973 init_ring_lists(struct intel_ring_buffer *ring) 4974 { 4975 INIT_LIST_HEAD(&ring->active_list); 4976 INIT_LIST_HEAD(&ring->request_list); 4977 } 4978 4979 static void i915_init_vm(struct drm_i915_private *dev_priv, 4980 struct i915_address_space *vm) 4981 { 4982 vm->dev = dev_priv->dev; 4983 INIT_LIST_HEAD(&vm->active_list); 4984 INIT_LIST_HEAD(&vm->inactive_list); 4985 INIT_LIST_HEAD(&vm->global_link); 4986 list_add(&vm->global_link, &dev_priv->vm_list); 4987 } 4988 4989 void 4990 i915_gem_load(struct drm_device *dev) 4991 { 4992 drm_i915_private_t *dev_priv = dev->dev_private; 4993 int i; 4994 4995 #ifdef __linux__ 4996 dev_priv->slab = 4997 kmem_cache_create("i915_gem_object", 4998 sizeof(struct drm_i915_gem_object), 0, 4999 SLAB_HWCACHE_ALIGN, 5000 NULL); 5001 #endif 5002 5003 INIT_LIST_HEAD(&dev_priv->vm_list); 5004 i915_init_vm(dev_priv, &dev_priv->gtt.base); 5005 5006 INIT_LIST_HEAD(&dev_priv->context_list); 5007 INIT_LIST_HEAD(&dev_priv->mm.unbound_list); 5008 INIT_LIST_HEAD(&dev_priv->mm.bound_list); 5009 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 5010 for (i = 0; i < I915_NUM_RINGS; i++) 5011 init_ring_lists(&dev_priv->ring[i]); 5012 for (i = 0; i < I915_MAX_NUM_FENCES; i++) 5013 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); 5014 INIT_DELAYED_WORK(&dev_priv->mm.retire_work, 5015 i915_gem_retire_work_handler); 5016 INIT_DELAYED_WORK(&dev_priv->mm.idle_work, 5017 i915_gem_idle_work_handler); 5018 init_waitqueue_head(&dev_priv->gpu_error.reset_queue); 5019 5020 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */ 5021 if (IS_GEN3(dev)) { 5022 I915_WRITE(MI_ARB_STATE, 5023 _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE)); 5024 } 5025 5026 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; 5027 5028 /* Old X drivers will take 0-2 for front, back, depth buffers */ 5029 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 5030 dev_priv->fence_reg_start = 3; 5031 5032 if (INTEL_INFO(dev)->gen >= 7 && !IS_VALLEYVIEW(dev)) 5033 dev_priv->num_fence_regs = 32; 5034 else if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) 5035 dev_priv->num_fence_regs = 16; 5036 else 5037 dev_priv->num_fence_regs = 8; 5038 5039 /* Initialize fence registers to zero */ 5040 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 5041 i915_gem_restore_fences(dev); 5042 5043 i915_gem_detect_bit_6_swizzle(dev); 5044 init_waitqueue_head(&dev_priv->pending_flip_queue); 5045 5046 dev_priv->mm.interruptible = true; 5047 5048 #ifdef notyet 5049 dev_priv->mm.inactive_shrinker.scan_objects = i915_gem_inactive_scan; 5050 dev_priv->mm.inactive_shrinker.count_objects = i915_gem_inactive_count; 5051 dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS; 5052 register_shrinker(&dev_priv->mm.inactive_shrinker); 5053 #endif 5054 } 5055 5056 /* 5057 * Create a physically contiguous memory object for this object 5058 * e.g. for cursor + overlay regs 5059 */ 5060 static int i915_gem_init_phys_object(struct drm_device *dev, 5061 int id, int size, int align) 5062 { 5063 drm_i915_private_t *dev_priv = dev->dev_private; 5064 struct drm_i915_gem_phys_object *phys_obj; 5065 int ret; 5066 5067 if (dev_priv->mm.phys_objs[id - 1] || !size) 5068 return 0; 5069 5070 phys_obj = kzalloc(sizeof(*phys_obj), GFP_KERNEL); 5071 if (!phys_obj) 5072 return -ENOMEM; 5073 5074 phys_obj->id = id; 5075 5076 phys_obj->handle = drm_dmamem_alloc(dev->dmat, size, align, 1, size, BUS_DMA_NOCACHE, 0); 5077 if (!phys_obj->handle) { 5078 ret = -ENOMEM; 5079 goto kfree_obj; 5080 } 5081 5082 dev_priv->mm.phys_objs[id - 1] = phys_obj; 5083 5084 return 0; 5085 kfree_obj: 5086 kfree(phys_obj); 5087 return ret; 5088 } 5089 5090 #ifdef notyet 5091 static void i915_gem_free_phys_object(struct drm_device *dev, int id) 5092 { 5093 drm_i915_private_t *dev_priv = dev->dev_private; 5094 struct drm_i915_gem_phys_object *phys_obj; 5095 5096 if (!dev_priv->mm.phys_objs[id - 1]) 5097 return; 5098 5099 phys_obj = dev_priv->mm.phys_objs[id - 1]; 5100 if (phys_obj->cur_obj) { 5101 i915_gem_detach_phys_object(dev, phys_obj->cur_obj); 5102 } 5103 5104 #ifdef CONFIG_X86 5105 set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE); 5106 #endif 5107 drm_pci_free(dev, phys_obj->handle); 5108 kfree(phys_obj); 5109 dev_priv->mm.phys_objs[id - 1] = NULL; 5110 } 5111 5112 void i915_gem_free_all_phys_object(struct drm_device *dev) 5113 { 5114 int i; 5115 5116 for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++) 5117 i915_gem_free_phys_object(dev, i); 5118 } 5119 #endif /* notyet */ 5120 5121 void i915_gem_detach_phys_object(struct drm_device *dev, 5122 struct drm_i915_gem_object *obj) 5123 { 5124 char *vaddr; 5125 int i; 5126 int page_count; 5127 5128 if (!obj->phys_obj) 5129 return; 5130 vaddr = obj->phys_obj->handle->kva; 5131 5132 page_count = obj->base.size / PAGE_SIZE; 5133 for (i = 0; i < page_count; i++) { 5134 #ifdef notyet 5135 struct page *page = shmem_read_mapping_page(mapping, i); 5136 if (!IS_ERR(page)) { 5137 char *dst = kmap_atomic(page); 5138 memcpy(dst, vaddr + i*PAGE_SIZE, PAGE_SIZE); 5139 kunmap_atomic(dst); 5140 5141 drm_clflush_pages(&page, 1); 5142 5143 set_page_dirty(page); 5144 mark_page_accessed(page); 5145 page_cache_release(page); 5146 } 5147 #endif 5148 } 5149 i915_gem_chipset_flush(dev); 5150 5151 obj->phys_obj->cur_obj = NULL; 5152 obj->phys_obj = NULL; 5153 } 5154 5155 int 5156 i915_gem_attach_phys_object(struct drm_device *dev, 5157 struct drm_i915_gem_object *obj, 5158 int id, 5159 int align) 5160 { 5161 drm_i915_private_t *dev_priv = dev->dev_private; 5162 int ret = 0; 5163 int page_count; 5164 int i; 5165 5166 if (id > I915_MAX_PHYS_OBJECT) 5167 return -EINVAL; 5168 5169 if (obj->phys_obj) { 5170 if (obj->phys_obj->id == id) 5171 return 0; 5172 i915_gem_detach_phys_object(dev, obj); 5173 } 5174 5175 /* create a new object */ 5176 if (!dev_priv->mm.phys_objs[id - 1]) { 5177 ret = i915_gem_init_phys_object(dev, id, 5178 obj->base.size, align); 5179 if (ret) { 5180 DRM_ERROR("failed to init phys object %d size: %zu\n", 5181 id, obj->base.size); 5182 return ret; 5183 } 5184 } 5185 5186 /* bind to the object */ 5187 obj->phys_obj = dev_priv->mm.phys_objs[id - 1]; 5188 obj->phys_obj->cur_obj = obj; 5189 5190 page_count = obj->base.size / PAGE_SIZE; 5191 5192 for (i = 0; i < page_count; i++) { 5193 #ifdef notyet 5194 struct page *page; 5195 char *dst, *src; 5196 5197 page = shmem_read_mapping_page(mapping, i); 5198 if (IS_ERR(page)) 5199 return PTR_ERR(page); 5200 5201 src = kmap_atomic(page); 5202 dst = obj->phys_obj->handle->kva + (i * PAGE_SIZE); 5203 memcpy(dst, src, PAGE_SIZE); 5204 kunmap_atomic(src); 5205 5206 mark_page_accessed(page); 5207 page_cache_release(page); 5208 #endif 5209 } 5210 5211 return 0; 5212 } 5213 5214 static int 5215 i915_gem_phys_pwrite(struct drm_device *dev, 5216 struct drm_i915_gem_object *obj, 5217 struct drm_i915_gem_pwrite *args, 5218 struct drm_file *file_priv) 5219 { 5220 void *vaddr = obj->phys_obj->handle->kva + args->offset; 5221 char __user *user_data = to_user_ptr(args->data_ptr); 5222 5223 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) { 5224 unsigned long unwritten; 5225 5226 /* The physical object once assigned is fixed for the lifetime 5227 * of the obj, so we can safely drop the lock and continue 5228 * to access vaddr. 5229 */ 5230 mutex_unlock(&dev->struct_mutex); 5231 unwritten = copy_from_user(vaddr, user_data, args->size); 5232 mutex_lock(&dev->struct_mutex); 5233 if (unwritten) 5234 return -EFAULT; 5235 } 5236 5237 i915_gem_chipset_flush(dev); 5238 return 0; 5239 } 5240 5241 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 5242 { 5243 struct drm_i915_file_private *file_priv = file->driver_priv; 5244 5245 cancel_delayed_work_sync(&file_priv->mm.idle_work); 5246 5247 /* Clean up our request list when the client is going away, so that 5248 * later retire_requests won't dereference our soon-to-be-gone 5249 * file_priv. 5250 */ 5251 spin_lock(&file_priv->mm.lock); 5252 while (!list_empty(&file_priv->mm.request_list)) { 5253 struct drm_i915_gem_request *request; 5254 5255 request = list_first_entry(&file_priv->mm.request_list, 5256 struct drm_i915_gem_request, 5257 client_list); 5258 list_del(&request->client_list); 5259 request->file_priv = NULL; 5260 } 5261 spin_unlock(&file_priv->mm.lock); 5262 } 5263 5264 static void 5265 i915_gem_file_idle_work_handler(struct work_struct *work) 5266 { 5267 struct drm_i915_file_private *file_priv = 5268 container_of(work, typeof(*file_priv), mm.idle_work.work); 5269 5270 atomic_set(&file_priv->rps_wait_boost, false); 5271 } 5272 5273 int i915_gem_open(struct drm_device *dev, struct drm_file *file) 5274 { 5275 struct drm_i915_file_private *file_priv; 5276 5277 DRM_DEBUG_DRIVER("\n"); 5278 5279 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 5280 if (!file_priv) 5281 return -ENOMEM; 5282 5283 file->driver_priv = file_priv; 5284 file_priv->dev_priv = dev->dev_private; 5285 5286 mtx_init(&file_priv->mm.lock, IPL_NONE); 5287 INIT_LIST_HEAD(&file_priv->mm.request_list); 5288 INIT_DELAYED_WORK(&file_priv->mm.idle_work, 5289 i915_gem_file_idle_work_handler); 5290 5291 idr_init(&file_priv->context_idr); 5292 5293 return 0; 5294 } 5295 5296 #ifdef notyet 5297 static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task) 5298 { 5299 if (!mutex_is_locked(mutex)) 5300 return false; 5301 5302 #if defined(CONFIG_SMP) && !defined(CONFIG_DEBUG_MUTEXES) 5303 return mutex->owner == task; 5304 #else 5305 /* Since UP may be pre-empted, we cannot assume that we own the lock */ 5306 return false; 5307 #endif 5308 } 5309 5310 static unsigned long 5311 i915_gem_inactive_count(struct shrinker *shrinker, struct shrink_control *sc) 5312 { 5313 struct drm_i915_private *dev_priv = 5314 container_of(shrinker, 5315 struct drm_i915_private, 5316 mm.inactive_shrinker); 5317 struct drm_device *dev = dev_priv->dev; 5318 struct drm_i915_gem_object *obj; 5319 bool unlock = true; 5320 unsigned long count; 5321 5322 if (!mutex_trylock(&dev->struct_mutex)) { 5323 if (!mutex_is_locked_by(&dev->struct_mutex, current)) 5324 return 0; 5325 5326 if (dev_priv->mm.shrinker_no_lock_stealing) 5327 return 0; 5328 5329 unlock = false; 5330 } 5331 5332 count = 0; 5333 list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) 5334 if (obj->pages_pin_count == 0) 5335 count += obj->base.size >> PAGE_SHIFT; 5336 5337 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 5338 if (obj->active) 5339 continue; 5340 5341 if (obj->pin_count == 0 && obj->pages_pin_count == 0) 5342 count += obj->base.size >> PAGE_SHIFT; 5343 } 5344 5345 if (unlock) 5346 mutex_unlock(&dev->struct_mutex); 5347 5348 return count; 5349 } 5350 #endif 5351 5352 /* All the new VM stuff */ 5353 unsigned long i915_gem_obj_offset(struct drm_i915_gem_object *o, 5354 struct i915_address_space *vm) 5355 { 5356 struct drm_i915_private *dev_priv = o->base.dev->dev_private; 5357 struct i915_vma *vma; 5358 5359 if (vm == &dev_priv->mm.aliasing_ppgtt->base) 5360 vm = &dev_priv->gtt.base; 5361 5362 BUG_ON(list_empty(&o->vma_list)); 5363 list_for_each_entry(vma, &o->vma_list, vma_link) { 5364 if (vma->vm == vm) 5365 return vma->node.start; 5366 5367 } 5368 return -1; 5369 } 5370 5371 bool i915_gem_obj_bound(struct drm_i915_gem_object *o, 5372 struct i915_address_space *vm) 5373 { 5374 struct i915_vma *vma; 5375 5376 list_for_each_entry(vma, &o->vma_list, vma_link) 5377 if (vma->vm == vm && drm_mm_node_allocated(&vma->node)) 5378 return true; 5379 5380 return false; 5381 } 5382 5383 bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o) 5384 { 5385 struct i915_vma *vma; 5386 5387 list_for_each_entry(vma, &o->vma_list, vma_link) 5388 if (drm_mm_node_allocated(&vma->node)) 5389 return true; 5390 5391 return false; 5392 } 5393 5394 unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o, 5395 struct i915_address_space *vm) 5396 { 5397 struct drm_i915_private *dev_priv = o->base.dev->dev_private; 5398 struct i915_vma *vma; 5399 5400 if (vm == &dev_priv->mm.aliasing_ppgtt->base) 5401 vm = &dev_priv->gtt.base; 5402 5403 BUG_ON(list_empty(&o->vma_list)); 5404 5405 list_for_each_entry(vma, &o->vma_list, vma_link) 5406 if (vma->vm == vm) 5407 return vma->node.size; 5408 5409 return 0; 5410 } 5411 5412 #ifdef notyet 5413 static unsigned long 5414 i915_gem_inactive_scan(struct shrinker *shrinker, struct shrink_control *sc) 5415 { 5416 struct drm_i915_private *dev_priv = 5417 container_of(shrinker, 5418 struct drm_i915_private, 5419 mm.inactive_shrinker); 5420 struct drm_device *dev = dev_priv->dev; 5421 unsigned long freed; 5422 bool unlock = true; 5423 5424 if (!mutex_trylock(&dev->struct_mutex)) { 5425 if (!mutex_is_locked_by(&dev->struct_mutex, current)) 5426 return SHRINK_STOP; 5427 5428 if (dev_priv->mm.shrinker_no_lock_stealing) 5429 return SHRINK_STOP; 5430 5431 unlock = false; 5432 } 5433 5434 freed = i915_gem_purge(dev_priv, sc->nr_to_scan); 5435 if (freed < sc->nr_to_scan) 5436 freed += __i915_gem_shrink(dev_priv, 5437 sc->nr_to_scan - freed, 5438 false); 5439 if (freed < sc->nr_to_scan) 5440 freed += i915_gem_shrink_all(dev_priv); 5441 5442 if (unlock) 5443 mutex_unlock(&dev->struct_mutex); 5444 5445 return freed; 5446 } 5447 #endif /* notyet */ 5448 5449 struct i915_vma *i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj) 5450 { 5451 struct i915_vma *vma; 5452 5453 if (WARN_ON(list_empty(&obj->vma_list))) 5454 return NULL; 5455 5456 vma = list_first_entry(&obj->vma_list, typeof(*vma), vma_link); 5457 if (WARN_ON(vma->vm != obj_to_ggtt(obj))) 5458 return NULL; 5459 5460 return vma; 5461 } 5462