1 /* 2 * Copyright © 2008 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 * Copyright (c) 2011 The FreeBSD Foundation 27 * All rights reserved. 28 * 29 * This software was developed by Konstantin Belousov under sponsorship from 30 * the FreeBSD Foundation. 31 * 32 * Redistribution and use in source and binary forms, with or without 33 * modification, are permitted provided that the following conditions 34 * are met: 35 * 1. Redistributions of source code must retain the above copyright 36 * notice, this list of conditions and the following disclaimer. 37 * 2. Redistributions in binary form must reproduce the above copyright 38 * notice, this list of conditions and the following disclaimer in the 39 * documentation and/or other materials provided with the distribution. 40 * 41 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 51 * SUCH DAMAGE. 52 * 53 */ 54 55 #include <sys/resourcevar.h> 56 #include <sys/sfbuf.h> 57 58 #include <drm/drmP.h> 59 #include <drm/i915_drm.h> 60 #include "i915_drv.h" 61 #include "intel_drv.h" 62 #include <linux/shmem_fs.h> 63 #include <linux/completion.h> 64 #include <linux/highmem.h> 65 #include <linux/jiffies.h> 66 #include <linux/time.h> 67 68 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); 69 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); 70 static __must_check int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, 71 unsigned alignment, 72 bool map_and_fenceable, 73 bool nonblocking); 74 static int i915_gem_phys_pwrite(struct drm_device *dev, 75 struct drm_i915_gem_object *obj, 76 struct drm_i915_gem_pwrite *args, 77 struct drm_file *file); 78 79 static void i915_gem_write_fence(struct drm_device *dev, int reg, 80 struct drm_i915_gem_object *obj); 81 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, 82 struct drm_i915_fence_reg *fence, 83 bool enable); 84 85 static long i915_gem_purge(struct drm_i915_private *dev_priv, long target); 86 87 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj); 88 static void i915_gem_object_truncate(struct drm_i915_gem_object *obj); 89 90 static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) 91 { 92 if (obj->tiling_mode) 93 i915_gem_release_mmap(obj); 94 95 /* As we do not have an associated fence register, we will force 96 * a tiling change if we ever need to acquire one. 97 */ 98 obj->fence_dirty = false; 99 obj->fence_reg = I915_FENCE_REG_NONE; 100 } 101 102 static bool i915_gem_object_is_inactive(struct drm_i915_gem_object *obj); 103 static void i915_gem_lowmem(void *arg); 104 105 /* some bookkeeping */ 106 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 107 size_t size) 108 { 109 dev_priv->mm.object_count++; 110 dev_priv->mm.object_memory += size; 111 } 112 113 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 114 size_t size) 115 { 116 dev_priv->mm.object_count--; 117 dev_priv->mm.object_memory -= size; 118 } 119 120 static int 121 i915_gem_wait_for_error(struct i915_gpu_error *error) 122 { 123 int ret; 124 125 #define EXIT_COND (!i915_reset_in_progress(error) || \ 126 i915_terminally_wedged(error)) 127 if (EXIT_COND) 128 return 0; 129 130 /* 131 * Only wait 10 seconds for the gpu reset to complete to avoid hanging 132 * userspace. If it takes that long something really bad is going on and 133 * we should simply try to bail out and fail as gracefully as possible. 134 */ 135 ret = wait_event_interruptible_timeout(error->reset_queue, 136 EXIT_COND, 137 10*HZ); 138 if (ret == 0) { 139 DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); 140 return -EIO; 141 } else if (ret < 0) { 142 return ret; 143 } 144 #undef EXIT_COND 145 146 return 0; 147 } 148 149 int i915_mutex_lock_interruptible(struct drm_device *dev) 150 { 151 struct drm_i915_private *dev_priv = dev->dev_private; 152 int ret; 153 154 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 155 if (ret) 156 return ret; 157 158 ret = lockmgr(&dev->struct_mutex, LK_EXCLUSIVE|LK_SLEEPFAIL); 159 if (ret) 160 return -EINTR; 161 162 WARN_ON(i915_verify_lists(dev)); 163 return 0; 164 } 165 166 static inline bool 167 i915_gem_object_is_inactive(struct drm_i915_gem_object *obj) 168 { 169 return !obj->active; 170 } 171 172 int 173 i915_gem_init_ioctl(struct drm_device *dev, void *data, 174 struct drm_file *file) 175 { 176 struct drm_i915_gem_init *args = data; 177 178 if (drm_core_check_feature(dev, DRIVER_MODESET)) 179 return -ENODEV; 180 181 if (args->gtt_start >= args->gtt_end || 182 (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1)) 183 return -EINVAL; 184 185 /* GEM with user mode setting was never supported on ilk and later. */ 186 if (INTEL_INFO(dev)->gen >= 5) 187 return -ENODEV; 188 189 mutex_lock(&dev->struct_mutex); 190 i915_gem_setup_global_gtt(dev, args->gtt_start, args->gtt_end, 191 args->gtt_end); 192 mutex_unlock(&dev->struct_mutex); 193 194 return 0; 195 } 196 197 int 198 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 199 struct drm_file *file) 200 { 201 struct drm_i915_private *dev_priv = dev->dev_private; 202 struct drm_i915_gem_get_aperture *args = data; 203 struct drm_i915_gem_object *obj; 204 size_t pinned; 205 206 pinned = 0; 207 mutex_lock(&dev->struct_mutex); 208 list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list) 209 if (obj->pin_count) 210 pinned += obj->gtt_space->size; 211 mutex_unlock(&dev->struct_mutex); 212 213 args->aper_size = dev_priv->gtt.total; 214 args->aper_available_size = args->aper_size - pinned; 215 216 return 0; 217 } 218 219 static int 220 i915_gem_create(struct drm_file *file, 221 struct drm_device *dev, 222 uint64_t size, 223 uint32_t *handle_p) 224 { 225 struct drm_i915_gem_object *obj; 226 int ret; 227 u32 handle; 228 229 size = roundup(size, PAGE_SIZE); 230 if (size == 0) 231 return -EINVAL; 232 233 /* Allocate the new object */ 234 obj = i915_gem_alloc_object(dev, size); 235 if (obj == NULL) 236 return -ENOMEM; 237 238 handle = 0; 239 ret = drm_gem_handle_create(file, &obj->base, &handle); 240 if (ret) { 241 drm_gem_object_release(&obj->base); 242 i915_gem_info_remove_obj(dev->dev_private, obj->base.size); 243 drm_free(obj, M_DRM); 244 return (-ret); 245 } 246 247 /* drop reference from allocate - handle holds it now */ 248 drm_gem_object_unreference(&obj->base); 249 *handle_p = handle; 250 return 0; 251 } 252 253 int 254 i915_gem_dumb_create(struct drm_file *file, 255 struct drm_device *dev, 256 struct drm_mode_create_dumb *args) 257 { 258 259 /* have to work out size/pitch and return them */ 260 args->pitch = roundup2(args->width * ((args->bpp + 7) / 8), 64); 261 args->size = args->pitch * args->height; 262 return i915_gem_create(file, dev, 263 args->size, &args->handle); 264 } 265 266 int i915_gem_dumb_destroy(struct drm_file *file, 267 struct drm_device *dev, 268 uint32_t handle) 269 { 270 271 return drm_gem_handle_delete(file, handle); 272 } 273 274 /** 275 * Creates a new mm object and returns a handle to it. 276 */ 277 int 278 i915_gem_create_ioctl(struct drm_device *dev, void *data, 279 struct drm_file *file) 280 { 281 struct drm_i915_gem_create *args = data; 282 283 return i915_gem_create(file, dev, 284 args->size, &args->handle); 285 } 286 287 static inline void vm_page_reference(vm_page_t m) 288 { 289 vm_page_flag_set(m, PG_REFERENCED); 290 } 291 292 static int 293 i915_gem_shmem_pread(struct drm_device *dev, 294 struct drm_i915_gem_object *obj, 295 struct drm_i915_gem_pread *args, 296 struct drm_file *file) 297 { 298 vm_object_t vm_obj; 299 vm_page_t m; 300 struct sf_buf *sf; 301 vm_offset_t mkva; 302 vm_pindex_t obj_pi; 303 int cnt, do_bit17_swizzling, length, obj_po, ret, swizzled_po; 304 305 do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 306 307 obj->dirty = 1; 308 vm_obj = obj->base.vm_obj; 309 ret = 0; 310 311 VM_OBJECT_LOCK(vm_obj); 312 vm_object_pip_add(vm_obj, 1); 313 while (args->size > 0) { 314 obj_pi = OFF_TO_IDX(args->offset); 315 obj_po = args->offset & PAGE_MASK; 316 317 m = shmem_read_mapping_page(vm_obj, obj_pi); 318 VM_OBJECT_UNLOCK(vm_obj); 319 320 sf = sf_buf_alloc(m); 321 mkva = sf_buf_kva(sf); 322 length = min(args->size, PAGE_SIZE - obj_po); 323 while (length > 0) { 324 if (do_bit17_swizzling && 325 (VM_PAGE_TO_PHYS(m) & (1 << 17)) != 0) { 326 cnt = roundup2(obj_po + 1, 64); 327 cnt = min(cnt - obj_po, length); 328 swizzled_po = obj_po ^ 64; 329 } else { 330 cnt = length; 331 swizzled_po = obj_po; 332 } 333 ret = -copyout_nofault( 334 (char *)mkva + swizzled_po, 335 (void *)(uintptr_t)args->data_ptr, cnt); 336 if (ret != 0) 337 break; 338 args->data_ptr += cnt; 339 args->size -= cnt; 340 length -= cnt; 341 args->offset += cnt; 342 obj_po += cnt; 343 } 344 sf_buf_free(sf); 345 VM_OBJECT_LOCK(vm_obj); 346 vm_page_reference(m); 347 vm_page_busy_wait(m, FALSE, "i915gem"); 348 vm_page_unwire(m, 1); 349 vm_page_wakeup(m); 350 351 if (ret != 0) 352 break; 353 } 354 vm_object_pip_wakeup(vm_obj); 355 VM_OBJECT_UNLOCK(vm_obj); 356 357 return (ret); 358 } 359 360 /** 361 * Reads data from the object referenced by handle. 362 * 363 * On error, the contents of *data are undefined. 364 */ 365 int 366 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 367 struct drm_file *file) 368 { 369 struct drm_i915_gem_pread *args = data; 370 struct drm_i915_gem_object *obj; 371 int ret = 0; 372 373 if (args->size == 0) 374 return 0; 375 376 ret = i915_mutex_lock_interruptible(dev); 377 if (ret) 378 return ret; 379 380 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 381 if (&obj->base == NULL) { 382 ret = -ENOENT; 383 goto unlock; 384 } 385 386 /* Bounds check source. */ 387 if (args->offset > obj->base.size || 388 args->size > obj->base.size - args->offset) { 389 ret = -EINVAL; 390 goto out; 391 } 392 393 ret = i915_gem_shmem_pread(dev, obj, args, file); 394 out: 395 drm_gem_object_unreference(&obj->base); 396 unlock: 397 mutex_unlock(&dev->struct_mutex); 398 return ret; 399 } 400 401 #if 0 402 /* This is the fast write path which cannot handle 403 * page faults in the source data 404 */ 405 406 static inline int 407 fast_user_write(struct io_mapping *mapping, 408 loff_t page_base, int page_offset, 409 char __user *user_data, 410 int length) 411 { 412 void __iomem *vaddr_atomic; 413 void *vaddr; 414 unsigned long unwritten; 415 416 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); 417 /* We can use the cpu mem copy function because this is X86. */ 418 vaddr = (void __force*)vaddr_atomic + page_offset; 419 unwritten = __copy_from_user_inatomic_nocache(vaddr, 420 user_data, length); 421 io_mapping_unmap_atomic(vaddr_atomic); 422 return unwritten; 423 } 424 425 /** 426 * This is the fast pwrite path, where we copy the data directly from the 427 * user into the GTT, uncached. 428 */ 429 static int 430 i915_gem_gtt_pwrite_fast(struct drm_device *dev, 431 struct drm_i915_gem_object *obj, 432 struct drm_i915_gem_pwrite *args, 433 struct drm_file *file) 434 { 435 drm_i915_private_t *dev_priv = dev->dev_private; 436 ssize_t remain; 437 loff_t offset, page_base; 438 char __user *user_data; 439 int page_offset, page_length, ret; 440 441 ret = i915_gem_object_pin(obj, 0, true, true); 442 if (ret) 443 goto out; 444 445 ret = i915_gem_object_set_to_gtt_domain(obj, true); 446 if (ret) 447 goto out_unpin; 448 449 ret = i915_gem_object_put_fence(obj); 450 if (ret) 451 goto out_unpin; 452 453 user_data = to_user_ptr(args->data_ptr); 454 remain = args->size; 455 456 offset = obj->gtt_offset + args->offset; 457 458 while (remain > 0) { 459 /* Operation in this page 460 * 461 * page_base = page offset within aperture 462 * page_offset = offset within page 463 * page_length = bytes to copy for this page 464 */ 465 page_base = offset & PAGE_MASK; 466 page_offset = offset_in_page(offset); 467 page_length = remain; 468 if ((page_offset + remain) > PAGE_SIZE) 469 page_length = PAGE_SIZE - page_offset; 470 471 /* If we get a fault while copying data, then (presumably) our 472 * source page isn't available. Return the error and we'll 473 * retry in the slow path. 474 */ 475 if (fast_user_write(dev_priv->gtt.mappable, page_base, 476 page_offset, user_data, page_length)) { 477 ret = -EFAULT; 478 goto out_unpin; 479 } 480 481 remain -= page_length; 482 user_data += page_length; 483 offset += page_length; 484 } 485 486 out_unpin: 487 i915_gem_object_unpin(obj); 488 out: 489 return ret; 490 } 491 #endif 492 493 static int 494 i915_gem_gtt_write(struct drm_device *dev, struct drm_i915_gem_object *obj, 495 uint64_t data_ptr, uint64_t size, uint64_t offset, struct drm_file *file) 496 { 497 vm_offset_t mkva; 498 int ret; 499 500 /* 501 * Pass the unaligned physical address and size to pmap_mapdev_attr() 502 * so it can properly calculate whether an extra page needs to be 503 * mapped or not to cover the requested range. The function will 504 * add the page offset into the returned mkva for us. 505 */ 506 mkva = (vm_offset_t)pmap_mapdev_attr(dev->agp->base + obj->gtt_offset + 507 offset, size, PAT_WRITE_COMBINING); 508 ret = -copyin_nofault((void *)(uintptr_t)data_ptr, (char *)mkva, size); 509 pmap_unmapdev(mkva, size); 510 return ret; 511 } 512 513 static int 514 i915_gem_shmem_pwrite(struct drm_device *dev, 515 struct drm_i915_gem_object *obj, 516 struct drm_i915_gem_pwrite *args, 517 struct drm_file *file) 518 { 519 vm_object_t vm_obj; 520 vm_page_t m; 521 struct sf_buf *sf; 522 vm_offset_t mkva; 523 vm_pindex_t obj_pi; 524 int cnt, do_bit17_swizzling, length, obj_po, ret, swizzled_po; 525 526 do_bit17_swizzling = 0; 527 528 obj->dirty = 1; 529 vm_obj = obj->base.vm_obj; 530 ret = 0; 531 532 VM_OBJECT_LOCK(vm_obj); 533 vm_object_pip_add(vm_obj, 1); 534 while (args->size > 0) { 535 obj_pi = OFF_TO_IDX(args->offset); 536 obj_po = args->offset & PAGE_MASK; 537 538 m = shmem_read_mapping_page(vm_obj, obj_pi); 539 VM_OBJECT_UNLOCK(vm_obj); 540 541 sf = sf_buf_alloc(m); 542 mkva = sf_buf_kva(sf); 543 length = min(args->size, PAGE_SIZE - obj_po); 544 while (length > 0) { 545 if (do_bit17_swizzling && 546 (VM_PAGE_TO_PHYS(m) & (1 << 17)) != 0) { 547 cnt = roundup2(obj_po + 1, 64); 548 cnt = min(cnt - obj_po, length); 549 swizzled_po = obj_po ^ 64; 550 } else { 551 cnt = length; 552 swizzled_po = obj_po; 553 } 554 ret = -copyin_nofault( 555 (void *)(uintptr_t)args->data_ptr, 556 (char *)mkva + swizzled_po, cnt); 557 if (ret != 0) 558 break; 559 args->data_ptr += cnt; 560 args->size -= cnt; 561 length -= cnt; 562 args->offset += cnt; 563 obj_po += cnt; 564 } 565 sf_buf_free(sf); 566 VM_OBJECT_LOCK(vm_obj); 567 vm_page_dirty(m); 568 vm_page_reference(m); 569 vm_page_busy_wait(m, FALSE, "i915gem"); 570 vm_page_unwire(m, 1); 571 vm_page_wakeup(m); 572 573 if (ret != 0) 574 break; 575 } 576 vm_object_pip_wakeup(vm_obj); 577 VM_OBJECT_UNLOCK(vm_obj); 578 579 return (ret); 580 } 581 582 /** 583 * Writes data to the object referenced by handle. 584 * 585 * On error, the contents of the buffer that were to be modified are undefined. 586 */ 587 int 588 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 589 struct drm_file *file) 590 { 591 struct drm_i915_gem_pwrite *args = data; 592 struct drm_i915_gem_object *obj; 593 vm_page_t *ma; 594 vm_offset_t start, end; 595 int npages, ret; 596 597 if (args->size == 0) 598 return 0; 599 600 start = trunc_page(args->data_ptr); 601 end = round_page(args->data_ptr + args->size); 602 npages = howmany(end - start, PAGE_SIZE); 603 ma = kmalloc(npages * sizeof(vm_page_t), M_DRM, M_WAITOK | 604 M_ZERO); 605 npages = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map, 606 (vm_offset_t)args->data_ptr, args->size, 607 VM_PROT_READ, ma, npages); 608 if (npages == -1) { 609 ret = -EFAULT; 610 goto free_ma; 611 } 612 613 ret = i915_mutex_lock_interruptible(dev); 614 if (ret != 0) 615 goto unlocked; 616 617 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 618 if (&obj->base == NULL) { 619 ret = -ENOENT; 620 goto unlock; 621 } 622 623 /* Bounds check destination. */ 624 if (args->offset > obj->base.size || 625 args->size > obj->base.size - args->offset) { 626 ret = -EINVAL; 627 goto out; 628 } 629 630 if (obj->phys_obj) { 631 ret = i915_gem_phys_pwrite(dev, obj, args, file); 632 } else if (obj->gtt_space && 633 obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 634 ret = i915_gem_object_pin(obj, 0, true, false); 635 if (ret != 0) 636 goto out; 637 ret = i915_gem_object_set_to_gtt_domain(obj, true); 638 if (ret != 0) 639 goto out_unpin; 640 ret = i915_gem_object_put_fence(obj); 641 if (ret != 0) 642 goto out_unpin; 643 ret = i915_gem_gtt_write(dev, obj, args->data_ptr, args->size, 644 args->offset, file); 645 out_unpin: 646 i915_gem_object_unpin(obj); 647 } else { 648 ret = i915_gem_object_set_to_cpu_domain(obj, true); 649 if (ret != 0) 650 goto out; 651 ret = i915_gem_shmem_pwrite(dev, obj, args, file); 652 } 653 out: 654 drm_gem_object_unreference(&obj->base); 655 unlock: 656 mutex_unlock(&dev->struct_mutex); 657 unlocked: 658 vm_page_unhold_pages(ma, npages); 659 free_ma: 660 drm_free(ma, M_DRM); 661 return ret; 662 } 663 664 int 665 i915_gem_check_wedge(struct i915_gpu_error *error, 666 bool interruptible) 667 { 668 if (i915_reset_in_progress(error)) { 669 /* Non-interruptible callers can't handle -EAGAIN, hence return 670 * -EIO unconditionally for these. */ 671 if (!interruptible) 672 return -EIO; 673 674 /* Recovery complete, but the reset failed ... */ 675 if (i915_terminally_wedged(error)) 676 return -EIO; 677 678 return -EAGAIN; 679 } 680 681 return 0; 682 } 683 684 /* 685 * Compare seqno against outstanding lazy request. Emit a request if they are 686 * equal. 687 */ 688 static int 689 i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno) 690 { 691 int ret; 692 693 DRM_LOCK_ASSERT(ring->dev); 694 695 ret = 0; 696 if (seqno == ring->outstanding_lazy_request) 697 ret = i915_add_request(ring, NULL, NULL); 698 699 return ret; 700 } 701 702 /** 703 * __wait_seqno - wait until execution of seqno has finished 704 * @ring: the ring expected to report seqno 705 * @seqno: duh! 706 * @reset_counter: reset sequence associated with the given seqno 707 * @interruptible: do an interruptible wait (normally yes) 708 * @timeout: in - how long to wait (NULL forever); out - how much time remaining 709 * 710 * Note: It is of utmost importance that the passed in seqno and reset_counter 711 * values have been read by the caller in an smp safe manner. Where read-side 712 * locks are involved, it is sufficient to read the reset_counter before 713 * unlocking the lock that protects the seqno. For lockless tricks, the 714 * reset_counter _must_ be read before, and an appropriate smp_rmb must be 715 * inserted. 716 * 717 * Returns 0 if the seqno was found within the alloted time. Else returns the 718 * errno with remaining time filled in timeout argument. 719 */ 720 static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, 721 unsigned reset_counter, 722 bool interruptible, struct timespec *timeout) 723 { 724 drm_i915_private_t *dev_priv = ring->dev->dev_private; 725 struct timespec before, now, wait_time={1,0}; 726 unsigned long timeout_jiffies; 727 long end; 728 bool wait_forever = true; 729 int ret; 730 731 if (i915_seqno_passed(ring->get_seqno(ring, true), seqno)) 732 return 0; 733 734 if (timeout != NULL) { 735 wait_time = *timeout; 736 wait_forever = false; 737 } 738 739 timeout_jiffies = timespec_to_jiffies_timeout(&wait_time); 740 741 if (WARN_ON(!ring->irq_get(ring))) 742 return -ENODEV; 743 744 /* Record current time in case interrupted by signal, or wedged * */ 745 getrawmonotonic(&before); 746 747 #define EXIT_COND \ 748 (i915_seqno_passed(ring->get_seqno(ring, false), seqno) || \ 749 i915_reset_in_progress(&dev_priv->gpu_error) || \ 750 reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) 751 do { 752 if (interruptible) 753 end = wait_event_interruptible_timeout(ring->irq_queue, 754 EXIT_COND, 755 timeout_jiffies); 756 else 757 end = wait_event_timeout(ring->irq_queue, EXIT_COND, 758 timeout_jiffies); 759 760 /* We need to check whether any gpu reset happened in between 761 * the caller grabbing the seqno and now ... */ 762 if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) 763 end = -EAGAIN; 764 765 /* ... but upgrade the -EGAIN to an -EIO if the gpu is truely 766 * gone. */ 767 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); 768 if (ret) 769 end = ret; 770 } while (end == 0 && wait_forever); 771 772 getrawmonotonic(&now); 773 774 ring->irq_put(ring); 775 #undef EXIT_COND 776 777 if (timeout) { 778 struct timespec sleep_time = timespec_sub(now, before); 779 *timeout = timespec_sub(*timeout, sleep_time); 780 if (!timespec_valid(timeout)) /* i.e. negative time remains */ 781 set_normalized_timespec(timeout, 0, 0); 782 } 783 784 switch (end) { 785 case -EIO: 786 case -EAGAIN: /* Wedged */ 787 case -ERESTARTSYS: /* Signal */ 788 return (int)end; 789 case 0: /* Timeout */ 790 return -ETIMEDOUT; /* -ETIME on Linux */ 791 default: /* Completed */ 792 WARN_ON(end < 0); /* We're not aware of other errors */ 793 return 0; 794 } 795 } 796 797 /** 798 * Waits for a sequence number to be signaled, and cleans up the 799 * request and object lists appropriately for that event. 800 */ 801 int 802 i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno) 803 { 804 struct drm_device *dev = ring->dev; 805 struct drm_i915_private *dev_priv = dev->dev_private; 806 bool interruptible = dev_priv->mm.interruptible; 807 int ret; 808 809 DRM_LOCK_ASSERT(dev); 810 BUG_ON(seqno == 0); 811 812 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); 813 if (ret) 814 return ret; 815 816 ret = i915_gem_check_olr(ring, seqno); 817 if (ret) 818 return ret; 819 820 return __wait_seqno(ring, seqno, 821 atomic_read(&dev_priv->gpu_error.reset_counter), 822 interruptible, NULL); 823 } 824 825 /** 826 * Ensures that all rendering to the object has completed and the object is 827 * safe to unbind from the GTT or access from the CPU. 828 */ 829 static __must_check int 830 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, 831 bool readonly) 832 { 833 struct intel_ring_buffer *ring = obj->ring; 834 u32 seqno; 835 int ret; 836 837 seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno; 838 if (seqno == 0) 839 return 0; 840 841 ret = i915_wait_seqno(ring, seqno); 842 if (ret) 843 return ret; 844 845 i915_gem_retire_requests_ring(ring); 846 847 /* Manually manage the write flush as we may have not yet 848 * retired the buffer. 849 */ 850 if (obj->last_write_seqno && 851 i915_seqno_passed(seqno, obj->last_write_seqno)) { 852 obj->last_write_seqno = 0; 853 obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; 854 } 855 856 return 0; 857 } 858 859 /* A nonblocking variant of the above wait. This is a highly dangerous routine 860 * as the object state may change during this call. 861 */ 862 static __must_check int 863 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, 864 bool readonly) 865 { 866 struct drm_device *dev = obj->base.dev; 867 struct drm_i915_private *dev_priv = dev->dev_private; 868 struct intel_ring_buffer *ring = obj->ring; 869 unsigned reset_counter; 870 u32 seqno; 871 int ret; 872 873 DRM_LOCK_ASSERT(dev); 874 BUG_ON(!dev_priv->mm.interruptible); 875 876 seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno; 877 if (seqno == 0) 878 return 0; 879 880 ret = i915_gem_check_wedge(&dev_priv->gpu_error, true); 881 if (ret) 882 return ret; 883 884 ret = i915_gem_check_olr(ring, seqno); 885 if (ret) 886 return ret; 887 888 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 889 mutex_unlock(&dev->struct_mutex); 890 ret = __wait_seqno(ring, seqno, reset_counter, true, NULL); 891 mutex_lock(&dev->struct_mutex); 892 893 i915_gem_retire_requests_ring(ring); 894 895 /* Manually manage the write flush as we may have not yet 896 * retired the buffer. 897 */ 898 if (obj->last_write_seqno && 899 i915_seqno_passed(seqno, obj->last_write_seqno)) { 900 obj->last_write_seqno = 0; 901 obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; 902 } 903 904 return ret; 905 } 906 907 /** 908 * Called when user space prepares to use an object with the CPU, either 909 * through the mmap ioctl's mapping or a GTT mapping. 910 */ 911 int 912 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 913 struct drm_file *file) 914 { 915 struct drm_i915_gem_set_domain *args = data; 916 struct drm_i915_gem_object *obj; 917 uint32_t read_domains = args->read_domains; 918 uint32_t write_domain = args->write_domain; 919 int ret; 920 921 /* Only handle setting domains to types used by the CPU. */ 922 if (write_domain & I915_GEM_GPU_DOMAINS) 923 return -EINVAL; 924 925 if (read_domains & I915_GEM_GPU_DOMAINS) 926 return -EINVAL; 927 928 /* Having something in the write domain implies it's in the read 929 * domain, and only that read domain. Enforce that in the request. 930 */ 931 if (write_domain != 0 && read_domains != write_domain) 932 return -EINVAL; 933 934 ret = i915_mutex_lock_interruptible(dev); 935 if (ret) 936 return ret; 937 938 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 939 if (&obj->base == NULL) { 940 ret = -ENOENT; 941 goto unlock; 942 } 943 944 /* Try to flush the object off the GPU without holding the lock. 945 * We will repeat the flush holding the lock in the normal manner 946 * to catch cases where we are gazumped. 947 */ 948 ret = i915_gem_object_wait_rendering__nonblocking(obj, !write_domain); 949 if (ret) 950 goto unref; 951 952 if (read_domains & I915_GEM_DOMAIN_GTT) { 953 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 954 955 /* Silently promote "you're not bound, there was nothing to do" 956 * to success, since the client was just asking us to 957 * make sure everything was done. 958 */ 959 if (ret == -EINVAL) 960 ret = 0; 961 } else { 962 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 963 } 964 965 unref: 966 drm_gem_object_unreference(&obj->base); 967 unlock: 968 mutex_unlock(&dev->struct_mutex); 969 return ret; 970 } 971 972 /** 973 * Called when user space has done writes to this buffer 974 */ 975 int 976 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 977 struct drm_file *file) 978 { 979 struct drm_i915_gem_sw_finish *args = data; 980 struct drm_i915_gem_object *obj; 981 int ret = 0; 982 983 ret = i915_mutex_lock_interruptible(dev); 984 if (ret) 985 return ret; 986 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 987 if (&obj->base == NULL) { 988 ret = -ENOENT; 989 goto unlock; 990 } 991 992 /* Pinned buffers may be scanout, so flush the cache */ 993 if (obj->pin_count) 994 i915_gem_object_flush_cpu_write_domain(obj); 995 996 drm_gem_object_unreference(&obj->base); 997 unlock: 998 mutex_unlock(&dev->struct_mutex); 999 return ret; 1000 } 1001 1002 /** 1003 * Maps the contents of an object, returning the address it is mapped 1004 * into. 1005 * 1006 * While the mapping holds a reference on the contents of the object, it doesn't 1007 * imply a ref on the object itself. 1008 */ 1009 int 1010 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1011 struct drm_file *file) 1012 { 1013 struct drm_i915_gem_mmap *args = data; 1014 struct drm_gem_object *obj; 1015 struct proc *p = curproc; 1016 vm_map_t map = &p->p_vmspace->vm_map; 1017 vm_offset_t addr; 1018 vm_size_t size; 1019 int error = 0, rv; 1020 1021 obj = drm_gem_object_lookup(dev, file, args->handle); 1022 if (obj == NULL) 1023 return -ENOENT; 1024 1025 if (args->size == 0) 1026 goto out; 1027 1028 size = round_page(args->size); 1029 if (map->size + size > p->p_rlimit[RLIMIT_VMEM].rlim_cur) { 1030 error = ENOMEM; 1031 goto out; 1032 } 1033 1034 addr = 0; 1035 vm_object_hold(obj->vm_obj); 1036 vm_object_reference_locked(obj->vm_obj); 1037 vm_object_drop(obj->vm_obj); 1038 rv = vm_map_find(map, obj->vm_obj, NULL, 1039 args->offset, &addr, args->size, 1040 PAGE_SIZE, /* align */ 1041 TRUE, /* fitit */ 1042 VM_MAPTYPE_NORMAL, /* maptype */ 1043 VM_PROT_READ | VM_PROT_WRITE, /* prot */ 1044 VM_PROT_READ | VM_PROT_WRITE, /* max */ 1045 MAP_SHARED /* cow */); 1046 if (rv != KERN_SUCCESS) { 1047 vm_object_deallocate(obj->vm_obj); 1048 error = -vm_mmap_to_errno(rv); 1049 } else { 1050 args->addr_ptr = (uint64_t)addr; 1051 } 1052 out: 1053 drm_gem_object_unreference(obj); 1054 return (error); 1055 } 1056 1057 /** 1058 * i915_gem_fault - fault a page into the GTT 1059 * vma: VMA in question 1060 * vmf: fault info 1061 * 1062 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1063 * from userspace. The fault handler takes care of binding the object to 1064 * the GTT (if needed), allocating and programming a fence register (again, 1065 * only if needed based on whether the old reg is still valid or the object 1066 * is tiled) and inserting a new PTE into the faulting process. 1067 * 1068 * Note that the faulting process may involve evicting existing objects 1069 * from the GTT and/or fence registers to make room. So performance may 1070 * suffer if the GTT working set is large or there are few fence registers 1071 * left. 1072 */ 1073 #if 0 1074 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1075 { 1076 struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data); 1077 struct drm_device *dev = obj->base.dev; 1078 drm_i915_private_t *dev_priv = dev->dev_private; 1079 pgoff_t page_offset; 1080 unsigned long pfn; 1081 int ret = 0; 1082 bool write = !!(vmf->flags & FAULT_FLAG_WRITE); 1083 1084 /* We don't use vmf->pgoff since that has the fake offset */ 1085 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >> 1086 PAGE_SHIFT; 1087 1088 ret = i915_mutex_lock_interruptible(dev); 1089 if (ret) 1090 goto out; 1091 1092 trace_i915_gem_object_fault(obj, page_offset, true, write); 1093 1094 /* Access to snoopable pages through the GTT is incoherent. */ 1095 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) { 1096 ret = -EINVAL; 1097 goto unlock; 1098 } 1099 1100 /* Now bind it into the GTT if needed */ 1101 ret = i915_gem_object_pin(obj, 0, true, false); 1102 if (ret) 1103 goto unlock; 1104 1105 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1106 if (ret) 1107 goto unpin; 1108 1109 ret = i915_gem_object_get_fence(obj); 1110 if (ret) 1111 goto unpin; 1112 1113 obj->fault_mappable = true; 1114 1115 pfn = ((dev_priv->gtt.mappable_base + obj->gtt_offset) >> PAGE_SHIFT) + 1116 page_offset; 1117 1118 /* Finally, remap it using the new GTT offset */ 1119 ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn); 1120 unpin: 1121 i915_gem_object_unpin(obj); 1122 unlock: 1123 mutex_unlock(&dev->struct_mutex); 1124 out: 1125 switch (ret) { 1126 case -EIO: 1127 /* If this -EIO is due to a gpu hang, give the reset code a 1128 * chance to clean up the mess. Otherwise return the proper 1129 * SIGBUS. */ 1130 if (i915_terminally_wedged(&dev_priv->gpu_error)) 1131 return VM_FAULT_SIGBUS; 1132 case -EAGAIN: 1133 /* Give the error handler a chance to run and move the 1134 * objects off the GPU active list. Next time we service the 1135 * fault, we should be able to transition the page into the 1136 * GTT without touching the GPU (and so avoid further 1137 * EIO/EGAIN). If the GPU is wedged, then there is no issue 1138 * with coherency, just lost writes. 1139 */ 1140 set_need_resched(); 1141 case 0: 1142 case -ERESTARTSYS: 1143 case -EINTR: 1144 case -EBUSY: 1145 /* 1146 * EBUSY is ok: this just means that another thread 1147 * already did the job. 1148 */ 1149 return VM_FAULT_NOPAGE; 1150 case -ENOMEM: 1151 return VM_FAULT_OOM; 1152 case -ENOSPC: 1153 return VM_FAULT_SIGBUS; 1154 default: 1155 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 1156 return VM_FAULT_SIGBUS; 1157 } 1158 } 1159 #endif 1160 1161 /** 1162 * i915_gem_release_mmap - remove physical page mappings 1163 * @obj: obj in question 1164 * 1165 * Preserve the reservation of the mmapping with the DRM core code, but 1166 * relinquish ownership of the pages back to the system. 1167 * 1168 * It is vital that we remove the page mapping if we have mapped a tiled 1169 * object through the GTT and then lose the fence register due to 1170 * resource pressure. Similarly if the object has been moved out of the 1171 * aperture, than pages mapped into userspace must be revoked. Removing the 1172 * mapping will then trigger a page fault on the next user access, allowing 1173 * fixup by i915_gem_fault(). 1174 */ 1175 void 1176 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 1177 { 1178 vm_object_t devobj; 1179 vm_page_t m; 1180 int i, page_count; 1181 1182 if (!obj->fault_mappable) 1183 return; 1184 1185 devobj = cdev_pager_lookup(obj); 1186 if (devobj != NULL) { 1187 page_count = OFF_TO_IDX(obj->base.size); 1188 1189 VM_OBJECT_LOCK(devobj); 1190 for (i = 0; i < page_count; i++) { 1191 m = vm_page_lookup_busy_wait(devobj, i, TRUE, "915unm"); 1192 if (m == NULL) 1193 continue; 1194 cdev_pager_free_page(devobj, m); 1195 } 1196 VM_OBJECT_UNLOCK(devobj); 1197 vm_object_deallocate(devobj); 1198 } 1199 1200 obj->fault_mappable = false; 1201 } 1202 1203 uint32_t 1204 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) 1205 { 1206 uint32_t gtt_size; 1207 1208 if (INTEL_INFO(dev)->gen >= 4 || 1209 tiling_mode == I915_TILING_NONE) 1210 return size; 1211 1212 /* Previous chips need a power-of-two fence region when tiling */ 1213 if (INTEL_INFO(dev)->gen == 3) 1214 gtt_size = 1024*1024; 1215 else 1216 gtt_size = 512*1024; 1217 1218 while (gtt_size < size) 1219 gtt_size <<= 1; 1220 1221 return gtt_size; 1222 } 1223 1224 /** 1225 * i915_gem_get_gtt_alignment - return required GTT alignment for an object 1226 * @obj: object to check 1227 * 1228 * Return the required GTT alignment for an object, taking into account 1229 * potential fence register mapping. 1230 */ 1231 uint32_t 1232 i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size, 1233 int tiling_mode, bool fenced) 1234 { 1235 1236 /* 1237 * Minimum alignment is 4k (GTT page size), but might be greater 1238 * if a fence register is needed for the object. 1239 */ 1240 if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) || 1241 tiling_mode == I915_TILING_NONE) 1242 return 4096; 1243 1244 /* 1245 * Previous chips need to be aligned to the size of the smallest 1246 * fence register that can contain the object. 1247 */ 1248 return i915_gem_get_gtt_size(dev, size, tiling_mode); 1249 } 1250 1251 int 1252 i915_gem_mmap_gtt(struct drm_file *file, 1253 struct drm_device *dev, 1254 uint32_t handle, 1255 uint64_t *offset) 1256 { 1257 struct drm_i915_private *dev_priv = dev->dev_private; 1258 struct drm_i915_gem_object *obj; 1259 int ret; 1260 1261 ret = i915_mutex_lock_interruptible(dev); 1262 if (ret) 1263 return ret; 1264 1265 obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle)); 1266 if (&obj->base == NULL) { 1267 ret = -ENOENT; 1268 goto unlock; 1269 } 1270 1271 if (obj->base.size > dev_priv->gtt.mappable_end) { 1272 ret = -E2BIG; 1273 goto out; 1274 } 1275 1276 if (obj->madv != I915_MADV_WILLNEED) { 1277 DRM_ERROR("Attempting to mmap a purgeable buffer\n"); 1278 ret = -EINVAL; 1279 goto out; 1280 } 1281 1282 ret = drm_gem_create_mmap_offset(&obj->base); 1283 if (ret) 1284 goto out; 1285 1286 *offset = DRM_GEM_MAPPING_OFF(obj->base.map_list.key) | 1287 DRM_GEM_MAPPING_KEY; 1288 out: 1289 drm_gem_object_unreference(&obj->base); 1290 unlock: 1291 mutex_unlock(&dev->struct_mutex); 1292 return ret; 1293 } 1294 1295 /** 1296 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 1297 * @dev: DRM device 1298 * @data: GTT mapping ioctl data 1299 * @file: GEM object info 1300 * 1301 * Simply returns the fake offset to userspace so it can mmap it. 1302 * The mmap call will end up in drm_gem_mmap(), which will set things 1303 * up so we can get faults in the handler above. 1304 * 1305 * The fault handler will take care of binding the object into the GTT 1306 * (since it may have been evicted to make room for something), allocating 1307 * a fence register, and mapping the appropriate aperture address into 1308 * userspace. 1309 */ 1310 int 1311 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 1312 struct drm_file *file) 1313 { 1314 struct drm_i915_gem_mmap_gtt *args = data; 1315 1316 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); 1317 } 1318 1319 /* Immediately discard the backing storage */ 1320 static void 1321 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 1322 { 1323 vm_object_t vm_obj; 1324 1325 vm_obj = obj->base.vm_obj; 1326 VM_OBJECT_LOCK(vm_obj); 1327 vm_object_page_remove(vm_obj, 0, 0, false); 1328 VM_OBJECT_UNLOCK(vm_obj); 1329 obj->madv = __I915_MADV_PURGED; 1330 } 1331 1332 static inline int 1333 i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj) 1334 { 1335 return obj->madv == I915_MADV_DONTNEED; 1336 } 1337 1338 static void 1339 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) 1340 { 1341 vm_page_t m; 1342 int page_count, i; 1343 1344 BUG_ON(obj->madv == __I915_MADV_PURGED); 1345 1346 if (obj->tiling_mode != I915_TILING_NONE) 1347 i915_gem_object_save_bit_17_swizzle(obj); 1348 if (obj->madv == I915_MADV_DONTNEED) 1349 obj->dirty = 0; 1350 page_count = obj->base.size / PAGE_SIZE; 1351 VM_OBJECT_LOCK(obj->base.vm_obj); 1352 #if GEM_PARANOID_CHECK_GTT 1353 i915_gem_assert_pages_not_mapped(obj->base.dev, obj->pages, page_count); 1354 #endif 1355 for (i = 0; i < page_count; i++) { 1356 m = obj->pages[i]; 1357 if (obj->dirty) 1358 vm_page_dirty(m); 1359 if (obj->madv == I915_MADV_WILLNEED) 1360 vm_page_reference(m); 1361 vm_page_busy_wait(obj->pages[i], FALSE, "i915gem"); 1362 vm_page_unwire(obj->pages[i], 1); 1363 vm_page_wakeup(obj->pages[i]); 1364 } 1365 VM_OBJECT_UNLOCK(obj->base.vm_obj); 1366 obj->dirty = 0; 1367 drm_free(obj->pages, M_DRM); 1368 obj->pages = NULL; 1369 } 1370 1371 int 1372 i915_gem_object_put_pages(struct drm_i915_gem_object *obj) 1373 { 1374 const struct drm_i915_gem_object_ops *ops = obj->ops; 1375 1376 if (obj->pages == NULL) 1377 return 0; 1378 1379 BUG_ON(obj->gtt_space); 1380 1381 if (obj->pages_pin_count) 1382 return -EBUSY; 1383 1384 /* ->put_pages might need to allocate memory for the bit17 swizzle 1385 * array, hence protect them from being reaped by removing them from gtt 1386 * lists early. */ 1387 list_del(&obj->gtt_list); 1388 1389 ops->put_pages(obj); 1390 obj->pages = NULL; 1391 1392 if (i915_gem_object_is_purgeable(obj)) 1393 i915_gem_object_truncate(obj); 1394 1395 return 0; 1396 } 1397 1398 static int 1399 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 1400 { 1401 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 1402 struct drm_device *dev; 1403 vm_object_t vm_obj; 1404 int page_count, i, j; 1405 struct vm_page *page; 1406 1407 dev = obj->base.dev; 1408 KASSERT(obj->pages == NULL, ("Obj already has pages")); 1409 page_count = obj->base.size / PAGE_SIZE; 1410 obj->pages = kmalloc(page_count * sizeof(vm_page_t), M_DRM, 1411 M_WAITOK); 1412 1413 vm_obj = obj->base.vm_obj; 1414 VM_OBJECT_LOCK(vm_obj); 1415 1416 for (i = 0; i < page_count; i++) { 1417 page = shmem_read_mapping_page(vm_obj, i); 1418 if (IS_ERR(page)) { 1419 i915_gem_purge(dev_priv, page_count); 1420 goto err_pages; 1421 } 1422 1423 obj->pages[i] = page; 1424 } 1425 1426 VM_OBJECT_UNLOCK(vm_obj); 1427 if (i915_gem_object_needs_bit17_swizzle(obj)) 1428 i915_gem_object_do_bit_17_swizzle(obj); 1429 1430 return 0; 1431 1432 err_pages: 1433 for (j = 0; j < i; j++) { 1434 page = obj->pages[j]; 1435 vm_page_busy_wait(page, FALSE, "i915gem"); 1436 vm_page_unwire(page, 0); 1437 vm_page_wakeup(page); 1438 } 1439 VM_OBJECT_UNLOCK(vm_obj); 1440 drm_free(obj->pages, M_DRM); 1441 obj->pages = NULL; 1442 return (-EIO); 1443 } 1444 1445 /* Ensure that the associated pages are gathered from the backing storage 1446 * and pinned into our object. i915_gem_object_get_pages() may be called 1447 * multiple times before they are released by a single call to 1448 * i915_gem_object_put_pages() - once the pages are no longer referenced 1449 * either as a result of memory pressure (reaping pages under the shrinker) 1450 * or as the object is itself released. 1451 */ 1452 int 1453 i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 1454 { 1455 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 1456 const struct drm_i915_gem_object_ops *ops = obj->ops; 1457 int ret; 1458 1459 if (obj->pages) 1460 return 0; 1461 1462 if (obj->madv != I915_MADV_WILLNEED) { 1463 DRM_ERROR("Attempting to obtain a purgeable object\n"); 1464 return -EINVAL; 1465 } 1466 1467 BUG_ON(obj->pages_pin_count); 1468 1469 ret = ops->get_pages(obj); 1470 if (ret) 1471 return ret; 1472 1473 list_add_tail(&obj->gtt_list, &dev_priv->mm.unbound_list); 1474 return 0; 1475 } 1476 1477 void 1478 i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, 1479 struct intel_ring_buffer *ring) 1480 { 1481 struct drm_device *dev = obj->base.dev; 1482 struct drm_i915_private *dev_priv = dev->dev_private; 1483 u32 seqno = intel_ring_get_seqno(ring); 1484 1485 BUG_ON(ring == NULL); 1486 obj->ring = ring; 1487 1488 /* Add a reference if we're newly entering the active list. */ 1489 if (!obj->active) { 1490 drm_gem_object_reference(&obj->base); 1491 obj->active = 1; 1492 } 1493 1494 /* Move from whatever list we were on to the tail of execution. */ 1495 list_move_tail(&obj->mm_list, &dev_priv->mm.active_list); 1496 list_move_tail(&obj->ring_list, &ring->active_list); 1497 1498 obj->last_read_seqno = seqno; 1499 1500 if (obj->fenced_gpu_access) { 1501 obj->last_fenced_seqno = seqno; 1502 1503 /* Bump MRU to take account of the delayed flush */ 1504 if (obj->fence_reg != I915_FENCE_REG_NONE) { 1505 struct drm_i915_fence_reg *reg; 1506 1507 reg = &dev_priv->fence_regs[obj->fence_reg]; 1508 list_move_tail(®->lru_list, 1509 &dev_priv->mm.fence_list); 1510 } 1511 } 1512 } 1513 1514 static void 1515 i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj) 1516 { 1517 struct drm_device *dev = obj->base.dev; 1518 struct drm_i915_private *dev_priv = dev->dev_private; 1519 1520 BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS); 1521 BUG_ON(!obj->active); 1522 1523 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list); 1524 1525 list_del_init(&obj->ring_list); 1526 obj->ring = NULL; 1527 1528 obj->last_read_seqno = 0; 1529 obj->last_write_seqno = 0; 1530 obj->base.write_domain = 0; 1531 1532 obj->last_fenced_seqno = 0; 1533 obj->fenced_gpu_access = false; 1534 1535 obj->active = 0; 1536 drm_gem_object_unreference(&obj->base); 1537 1538 WARN_ON(i915_verify_lists(dev)); 1539 } 1540 1541 static int 1542 i915_gem_init_seqno(struct drm_device *dev, u32 seqno) 1543 { 1544 struct drm_i915_private *dev_priv = dev->dev_private; 1545 struct intel_ring_buffer *ring; 1546 int ret, i, j; 1547 1548 /* Carefully retire all requests without writing to the rings */ 1549 for_each_ring(ring, dev_priv, i) { 1550 ret = intel_ring_idle(ring); 1551 if (ret) 1552 return ret; 1553 } 1554 i915_gem_retire_requests(dev); 1555 1556 /* Finally reset hw state */ 1557 for_each_ring(ring, dev_priv, i) { 1558 intel_ring_init_seqno(ring, seqno); 1559 1560 for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++) 1561 ring->sync_seqno[j] = 0; 1562 } 1563 1564 return 0; 1565 } 1566 1567 int i915_gem_set_seqno(struct drm_device *dev, u32 seqno) 1568 { 1569 struct drm_i915_private *dev_priv = dev->dev_private; 1570 int ret; 1571 1572 if (seqno == 0) 1573 return -EINVAL; 1574 1575 /* HWS page needs to be set less than what we 1576 * will inject to ring 1577 */ 1578 ret = i915_gem_init_seqno(dev, seqno - 1); 1579 if (ret) 1580 return ret; 1581 1582 /* Carefully set the last_seqno value so that wrap 1583 * detection still works 1584 */ 1585 dev_priv->next_seqno = seqno; 1586 dev_priv->last_seqno = seqno - 1; 1587 if (dev_priv->last_seqno == 0) 1588 dev_priv->last_seqno--; 1589 1590 return 0; 1591 } 1592 1593 int 1594 i915_gem_get_seqno(struct drm_device *dev, u32 *seqno) 1595 { 1596 struct drm_i915_private *dev_priv = dev->dev_private; 1597 1598 /* reserve 0 for non-seqno */ 1599 if (dev_priv->next_seqno == 0) { 1600 int ret = i915_gem_init_seqno(dev, 0); 1601 if (ret) 1602 return ret; 1603 1604 dev_priv->next_seqno = 1; 1605 } 1606 1607 *seqno = dev_priv->last_seqno = dev_priv->next_seqno++; 1608 return 0; 1609 } 1610 1611 int 1612 i915_add_request(struct intel_ring_buffer *ring, 1613 struct drm_file *file, 1614 u32 *out_seqno) 1615 { 1616 drm_i915_private_t *dev_priv = ring->dev->dev_private; 1617 struct drm_i915_gem_request *request; 1618 u32 request_ring_position; 1619 int was_empty; 1620 int ret; 1621 1622 /* 1623 * Emit any outstanding flushes - execbuf can fail to emit the flush 1624 * after having emitted the batchbuffer command. Hence we need to fix 1625 * things up similar to emitting the lazy request. The difference here 1626 * is that the flush _must_ happen before the next request, no matter 1627 * what. 1628 */ 1629 ret = intel_ring_flush_all_caches(ring); 1630 if (ret) 1631 return ret; 1632 1633 request = kmalloc(sizeof(*request), M_DRM, M_WAITOK | M_ZERO); 1634 if (request == NULL) 1635 return -ENOMEM; 1636 1637 1638 /* Record the position of the start of the request so that 1639 * should we detect the updated seqno part-way through the 1640 * GPU processing the request, we never over-estimate the 1641 * position of the head. 1642 */ 1643 request_ring_position = intel_ring_get_tail(ring); 1644 1645 ret = ring->add_request(ring); 1646 if (ret) { 1647 kfree(request); 1648 return ret; 1649 } 1650 1651 request->seqno = intel_ring_get_seqno(ring); 1652 request->ring = ring; 1653 request->tail = request_ring_position; 1654 request->emitted_jiffies = jiffies; 1655 was_empty = list_empty(&ring->request_list); 1656 list_add_tail(&request->list, &ring->request_list); 1657 request->file_priv = NULL; 1658 1659 if (file) { 1660 struct drm_i915_file_private *file_priv = file->driver_priv; 1661 1662 spin_lock(&file_priv->mm.lock); 1663 request->file_priv = file_priv; 1664 list_add_tail(&request->client_list, 1665 &file_priv->mm.request_list); 1666 spin_unlock(&file_priv->mm.lock); 1667 } 1668 1669 ring->outstanding_lazy_request = 0; 1670 1671 if (!dev_priv->mm.suspended) { 1672 if (i915_enable_hangcheck) { 1673 mod_timer(&dev_priv->gpu_error.hangcheck_timer, 1674 round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES)); 1675 } 1676 if (was_empty) { 1677 queue_delayed_work(dev_priv->wq, 1678 &dev_priv->mm.retire_work, 1679 round_jiffies_up_relative(hz)); 1680 intel_mark_busy(dev_priv->dev); 1681 } 1682 } 1683 1684 if (out_seqno) 1685 *out_seqno = request->seqno; 1686 return 0; 1687 } 1688 1689 static inline void 1690 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) 1691 { 1692 struct drm_i915_file_private *file_priv = request->file_priv; 1693 1694 if (!file_priv) 1695 return; 1696 1697 spin_lock(&file_priv->mm.lock); 1698 if (request->file_priv) { 1699 list_del(&request->client_list); 1700 request->file_priv = NULL; 1701 } 1702 spin_unlock(&file_priv->mm.lock); 1703 } 1704 1705 static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv, 1706 struct intel_ring_buffer *ring) 1707 { 1708 while (!list_empty(&ring->request_list)) { 1709 struct drm_i915_gem_request *request; 1710 1711 request = list_first_entry(&ring->request_list, 1712 struct drm_i915_gem_request, 1713 list); 1714 1715 list_del(&request->list); 1716 i915_gem_request_remove_from_client(request); 1717 drm_free(request, M_DRM); 1718 } 1719 1720 while (!list_empty(&ring->active_list)) { 1721 struct drm_i915_gem_object *obj; 1722 1723 obj = list_first_entry(&ring->active_list, 1724 struct drm_i915_gem_object, 1725 ring_list); 1726 1727 i915_gem_object_move_to_inactive(obj); 1728 } 1729 } 1730 1731 void i915_gem_restore_fences(struct drm_device *dev) 1732 { 1733 struct drm_i915_private *dev_priv = dev->dev_private; 1734 int i; 1735 1736 for (i = 0; i < dev_priv->num_fence_regs; i++) { 1737 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; 1738 i915_gem_write_fence(dev, i, reg->obj); 1739 } 1740 } 1741 1742 void i915_gem_reset(struct drm_device *dev) 1743 { 1744 struct drm_i915_private *dev_priv = dev->dev_private; 1745 struct drm_i915_gem_object *obj; 1746 struct intel_ring_buffer *ring; 1747 int i; 1748 1749 for_each_ring(ring, dev_priv, i) 1750 i915_gem_reset_ring_lists(dev_priv, ring); 1751 1752 /* Move everything out of the GPU domains to ensure we do any 1753 * necessary invalidation upon reuse. 1754 */ 1755 list_for_each_entry(obj, 1756 &dev_priv->mm.inactive_list, 1757 mm_list) 1758 { 1759 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS; 1760 } 1761 1762 i915_gem_restore_fences(dev); 1763 } 1764 1765 /** 1766 * This function clears the request list as sequence numbers are passed. 1767 */ 1768 void 1769 i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) 1770 { 1771 uint32_t seqno; 1772 1773 if (list_empty(&ring->request_list)) 1774 return; 1775 1776 WARN_ON(i915_verify_lists(ring->dev)); 1777 1778 seqno = ring->get_seqno(ring, true); 1779 1780 while (!list_empty(&ring->request_list)) { 1781 struct drm_i915_gem_request *request; 1782 1783 request = list_first_entry(&ring->request_list, 1784 struct drm_i915_gem_request, 1785 list); 1786 1787 if (!i915_seqno_passed(seqno, request->seqno)) 1788 break; 1789 1790 /* We know the GPU must have read the request to have 1791 * sent us the seqno + interrupt, so use the position 1792 * of tail of the request to update the last known position 1793 * of the GPU head. 1794 */ 1795 ring->last_retired_head = request->tail; 1796 1797 list_del(&request->list); 1798 i915_gem_request_remove_from_client(request); 1799 kfree(request); 1800 } 1801 1802 /* Move any buffers on the active list that are no longer referenced 1803 * by the ringbuffer to the flushing/inactive lists as appropriate. 1804 */ 1805 while (!list_empty(&ring->active_list)) { 1806 struct drm_i915_gem_object *obj; 1807 1808 obj = list_first_entry(&ring->active_list, 1809 struct drm_i915_gem_object, 1810 ring_list); 1811 1812 if (!i915_seqno_passed(seqno, obj->last_read_seqno)) 1813 break; 1814 1815 i915_gem_object_move_to_inactive(obj); 1816 } 1817 1818 if (unlikely(ring->trace_irq_seqno && 1819 i915_seqno_passed(seqno, ring->trace_irq_seqno))) { 1820 ring->irq_put(ring); 1821 ring->trace_irq_seqno = 0; 1822 } 1823 1824 } 1825 1826 void 1827 i915_gem_retire_requests(struct drm_device *dev) 1828 { 1829 drm_i915_private_t *dev_priv = dev->dev_private; 1830 struct intel_ring_buffer *ring; 1831 int i; 1832 1833 for_each_ring(ring, dev_priv, i) 1834 i915_gem_retire_requests_ring(ring); 1835 } 1836 1837 static long 1838 __i915_gem_shrink(struct drm_i915_private *dev_priv, long target, 1839 bool purgeable_only) 1840 { 1841 struct drm_i915_gem_object *obj, *next; 1842 long count = 0; 1843 1844 list_for_each_entry_safe(obj, next, 1845 &dev_priv->mm.unbound_list, 1846 gtt_list) { 1847 #if 0 1848 if ((i915_gem_object_is_purgeable(obj) || !purgeable_only) && 1849 i915_gem_object_put_pages(obj) == 0) { 1850 count += obj->base.size >> PAGE_SHIFT; 1851 if (count >= target) 1852 return count; 1853 } 1854 #endif 1855 } 1856 1857 list_for_each_entry_safe(obj, next, 1858 &dev_priv->mm.inactive_list, 1859 mm_list) { 1860 #if 0 1861 if ((i915_gem_object_is_purgeable(obj) || !purgeable_only) && 1862 i915_gem_object_unbind(obj) == 0 && 1863 i915_gem_object_put_pages(obj) == 0) { 1864 count += obj->base.size >> PAGE_SHIFT; 1865 if (count >= target) 1866 return count; 1867 } 1868 #endif 1869 } 1870 1871 return count; 1872 } 1873 1874 static long 1875 i915_gem_purge(struct drm_i915_private *dev_priv, long target) 1876 { 1877 return __i915_gem_shrink(dev_priv, target, true); 1878 } 1879 1880 static void 1881 i915_gem_retire_work_handler(struct work_struct *work) 1882 { 1883 drm_i915_private_t *dev_priv; 1884 struct drm_device *dev; 1885 struct intel_ring_buffer *ring; 1886 bool idle; 1887 int i; 1888 1889 dev_priv = container_of(work, drm_i915_private_t, 1890 mm.retire_work.work); 1891 dev = dev_priv->dev; 1892 1893 /* Come back later if the device is busy... */ 1894 if (lockmgr(&dev->struct_mutex, LK_EXCLUSIVE|LK_NOWAIT)) { 1895 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 1896 round_jiffies_up_relative(hz)); 1897 return; 1898 } 1899 1900 i915_gem_retire_requests(dev); 1901 1902 /* Send a periodic flush down the ring so we don't hold onto GEM 1903 * objects indefinitely. 1904 */ 1905 idle = true; 1906 for_each_ring(ring, dev_priv, i) { 1907 if (ring->gpu_caches_dirty) 1908 i915_add_request(ring, NULL, NULL); 1909 1910 idle &= list_empty(&ring->request_list); 1911 } 1912 1913 if (!dev_priv->mm.suspended && !idle) 1914 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 1915 round_jiffies_up_relative(hz)); 1916 if (idle) 1917 intel_mark_idle(dev); 1918 1919 mutex_unlock(&dev->struct_mutex); 1920 } 1921 /** 1922 * Ensures that an object will eventually get non-busy by flushing any required 1923 * write domains, emitting any outstanding lazy request and retiring and 1924 * completed requests. 1925 */ 1926 static int 1927 i915_gem_object_flush_active(struct drm_i915_gem_object *obj) 1928 { 1929 int ret; 1930 1931 if (obj->active) { 1932 ret = i915_gem_check_olr(obj->ring, obj->last_read_seqno); 1933 if (ret) 1934 return ret; 1935 1936 i915_gem_retire_requests_ring(obj->ring); 1937 } 1938 1939 return 0; 1940 } 1941 1942 /** 1943 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT 1944 * @DRM_IOCTL_ARGS: standard ioctl arguments 1945 * 1946 * Returns 0 if successful, else an error is returned with the remaining time in 1947 * the timeout parameter. 1948 * -ETIME: object is still busy after timeout 1949 * -ERESTARTSYS: signal interrupted the wait 1950 * -ENONENT: object doesn't exist 1951 * Also possible, but rare: 1952 * -EAGAIN: GPU wedged 1953 * -ENOMEM: damn 1954 * -ENODEV: Internal IRQ fail 1955 * -E?: The add request failed 1956 * 1957 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any 1958 * non-zero timeout parameter the wait ioctl will wait for the given number of 1959 * nanoseconds on an object becoming unbusy. Since the wait itself does so 1960 * without holding struct_mutex the object may become re-busied before this 1961 * function completes. A similar but shorter * race condition exists in the busy 1962 * ioctl 1963 */ 1964 int 1965 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 1966 { 1967 drm_i915_private_t *dev_priv = dev->dev_private; 1968 struct drm_i915_gem_wait *args = data; 1969 struct drm_i915_gem_object *obj; 1970 struct intel_ring_buffer *ring = NULL; 1971 struct timespec timeout_stack, *timeout = NULL; 1972 unsigned reset_counter; 1973 u32 seqno = 0; 1974 int ret = 0; 1975 1976 if (args->timeout_ns >= 0) { 1977 timeout_stack = ns_to_timespec(args->timeout_ns); 1978 timeout = &timeout_stack; 1979 } 1980 1981 ret = i915_mutex_lock_interruptible(dev); 1982 if (ret) 1983 return ret; 1984 1985 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->bo_handle)); 1986 if (&obj->base == NULL) { 1987 mutex_unlock(&dev->struct_mutex); 1988 return -ENOENT; 1989 } 1990 1991 /* Need to make sure the object gets inactive eventually. */ 1992 ret = i915_gem_object_flush_active(obj); 1993 if (ret) 1994 goto out; 1995 1996 if (obj->active) { 1997 seqno = obj->last_read_seqno; 1998 ring = obj->ring; 1999 } 2000 2001 if (seqno == 0) 2002 goto out; 2003 2004 /* Do this after OLR check to make sure we make forward progress polling 2005 * on this IOCTL with a 0 timeout (like busy ioctl) 2006 */ 2007 if (!args->timeout_ns) { 2008 ret = -ETIMEDOUT; 2009 goto out; 2010 } 2011 2012 drm_gem_object_unreference(&obj->base); 2013 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 2014 mutex_unlock(&dev->struct_mutex); 2015 2016 ret = __wait_seqno(ring, seqno, reset_counter, true, timeout); 2017 if (timeout) 2018 args->timeout_ns = timespec_to_ns(timeout); 2019 return ret; 2020 2021 out: 2022 drm_gem_object_unreference(&obj->base); 2023 mutex_unlock(&dev->struct_mutex); 2024 return ret; 2025 } 2026 2027 /** 2028 * i915_gem_object_sync - sync an object to a ring. 2029 * 2030 * @obj: object which may be in use on another ring. 2031 * @to: ring we wish to use the object on. May be NULL. 2032 * 2033 * This code is meant to abstract object synchronization with the GPU. 2034 * Calling with NULL implies synchronizing the object with the CPU 2035 * rather than a particular GPU ring. 2036 * 2037 * Returns 0 if successful, else propagates up the lower layer error. 2038 */ 2039 int 2040 i915_gem_object_sync(struct drm_i915_gem_object *obj, 2041 struct intel_ring_buffer *to) 2042 { 2043 struct intel_ring_buffer *from = obj->ring; 2044 u32 seqno; 2045 int ret, idx; 2046 2047 if (from == NULL || to == from) 2048 return 0; 2049 2050 if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev)) 2051 return i915_gem_object_wait_rendering(obj, false); 2052 2053 idx = intel_ring_sync_index(from, to); 2054 2055 seqno = obj->last_read_seqno; 2056 if (seqno <= from->sync_seqno[idx]) 2057 return 0; 2058 2059 ret = i915_gem_check_olr(obj->ring, seqno); 2060 if (ret) 2061 return ret; 2062 2063 ret = to->sync_to(to, from, seqno); 2064 if (!ret) 2065 /* We use last_read_seqno because sync_to() 2066 * might have just caused seqno wrap under 2067 * the radar. 2068 */ 2069 from->sync_seqno[idx] = obj->last_read_seqno; 2070 2071 return ret; 2072 } 2073 2074 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) 2075 { 2076 u32 old_write_domain, old_read_domains; 2077 2078 /* Force a pagefault for domain tracking on next user access */ 2079 i915_gem_release_mmap(obj); 2080 2081 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 2082 return; 2083 2084 /* Wait for any direct GTT access to complete */ 2085 cpu_mfence(); 2086 2087 old_read_domains = obj->base.read_domains; 2088 old_write_domain = obj->base.write_domain; 2089 2090 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT; 2091 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT; 2092 2093 } 2094 2095 /** 2096 * Unbinds an object from the GTT aperture. 2097 */ 2098 int 2099 i915_gem_object_unbind(struct drm_i915_gem_object *obj) 2100 { 2101 drm_i915_private_t *dev_priv = obj->base.dev->dev_private; 2102 int ret; 2103 2104 if (obj->gtt_space == NULL) 2105 return 0; 2106 2107 if (obj->pin_count) 2108 return -EBUSY; 2109 2110 BUG_ON(obj->pages == NULL); 2111 2112 ret = i915_gem_object_finish_gpu(obj); 2113 if (ret) 2114 return ret; 2115 /* Continue on if we fail due to EIO, the GPU is hung so we 2116 * should be safe and we need to cleanup or else we might 2117 * cause memory corruption through use-after-free. 2118 */ 2119 2120 i915_gem_object_finish_gtt(obj); 2121 2122 /* Move the object to the CPU domain to ensure that 2123 * any possible CPU writes while it's not in the GTT 2124 * are flushed when we go to remap it. 2125 */ 2126 if (ret == 0) 2127 ret = i915_gem_object_set_to_cpu_domain(obj, 1); 2128 if (ret == -ERESTARTSYS) 2129 return ret; 2130 if (ret) { 2131 /* In the event of a disaster, abandon all caches and 2132 * hope for the best. 2133 */ 2134 i915_gem_clflush_object(obj); 2135 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 2136 } 2137 2138 /* release the fence reg _after_ flushing */ 2139 ret = i915_gem_object_put_fence(obj); 2140 if (ret) 2141 return ret; 2142 2143 if (obj->has_global_gtt_mapping) 2144 i915_gem_gtt_unbind_object(obj); 2145 if (obj->has_aliasing_ppgtt_mapping) { 2146 i915_ppgtt_unbind_object(dev_priv->mm.aliasing_ppgtt, obj); 2147 obj->has_aliasing_ppgtt_mapping = 0; 2148 } 2149 i915_gem_gtt_finish_object(obj); 2150 2151 i915_gem_object_put_pages_gtt(obj); 2152 2153 list_del_init(&obj->gtt_list); 2154 list_del_init(&obj->mm_list); 2155 /* Avoid an unnecessary call to unbind on rebind. */ 2156 obj->map_and_fenceable = true; 2157 2158 drm_mm_put_block(obj->gtt_space); 2159 obj->gtt_space = NULL; 2160 obj->gtt_offset = 0; 2161 2162 if (i915_gem_object_is_purgeable(obj)) 2163 i915_gem_object_truncate(obj); 2164 2165 return ret; 2166 } 2167 2168 int i915_gpu_idle(struct drm_device *dev) 2169 { 2170 drm_i915_private_t *dev_priv = dev->dev_private; 2171 struct intel_ring_buffer *ring; 2172 int ret, i; 2173 2174 /* Flush everything onto the inactive list. */ 2175 for_each_ring(ring, dev_priv, i) { 2176 ret = i915_switch_context(ring, NULL, DEFAULT_CONTEXT_ID); 2177 if (ret) 2178 return ret; 2179 2180 ret = intel_ring_idle(ring); 2181 if (ret) 2182 return ret; 2183 } 2184 2185 return 0; 2186 } 2187 2188 static void i965_write_fence_reg(struct drm_device *dev, int reg, 2189 struct drm_i915_gem_object *obj) 2190 { 2191 drm_i915_private_t *dev_priv = dev->dev_private; 2192 int fence_reg; 2193 int fence_pitch_shift; 2194 uint64_t val; 2195 2196 if (INTEL_INFO(dev)->gen >= 6) { 2197 fence_reg = FENCE_REG_SANDYBRIDGE_0; 2198 fence_pitch_shift = SANDYBRIDGE_FENCE_PITCH_SHIFT; 2199 } else { 2200 fence_reg = FENCE_REG_965_0; 2201 fence_pitch_shift = I965_FENCE_PITCH_SHIFT; 2202 } 2203 2204 if (obj) { 2205 u32 size = obj->gtt_space->size; 2206 2207 val = (uint64_t)((obj->gtt_offset + size - 4096) & 2208 0xfffff000) << 32; 2209 val |= obj->gtt_offset & 0xfffff000; 2210 val |= (uint64_t)((obj->stride / 128) - 1) << fence_pitch_shift; 2211 if (obj->tiling_mode == I915_TILING_Y) 2212 val |= 1 << I965_FENCE_TILING_Y_SHIFT; 2213 val |= I965_FENCE_REG_VALID; 2214 } else 2215 val = 0; 2216 2217 fence_reg += reg * 8; 2218 I915_WRITE64(fence_reg, val); 2219 POSTING_READ(fence_reg); 2220 } 2221 2222 static void i915_write_fence_reg(struct drm_device *dev, int reg, 2223 struct drm_i915_gem_object *obj) 2224 { 2225 drm_i915_private_t *dev_priv = dev->dev_private; 2226 u32 val; 2227 2228 if (obj) { 2229 u32 size = obj->gtt_space->size; 2230 int pitch_val; 2231 int tile_width; 2232 2233 WARN((obj->gtt_offset & ~I915_FENCE_START_MASK) || 2234 (size & -size) != size || 2235 (obj->gtt_offset & (size - 1)), 2236 "object 0x%08x [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n", 2237 obj->gtt_offset, obj->map_and_fenceable, size); 2238 2239 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)) 2240 tile_width = 128; 2241 else 2242 tile_width = 512; 2243 2244 /* Note: pitch better be a power of two tile widths */ 2245 pitch_val = obj->stride / tile_width; 2246 pitch_val = ffs(pitch_val) - 1; 2247 2248 val = obj->gtt_offset; 2249 if (obj->tiling_mode == I915_TILING_Y) 2250 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 2251 val |= I915_FENCE_SIZE_BITS(size); 2252 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 2253 val |= I830_FENCE_REG_VALID; 2254 } else 2255 val = 0; 2256 2257 if (reg < 8) 2258 reg = FENCE_REG_830_0 + reg * 4; 2259 else 2260 reg = FENCE_REG_945_8 + (reg - 8) * 4; 2261 2262 I915_WRITE(reg, val); 2263 POSTING_READ(reg); 2264 } 2265 2266 static void i830_write_fence_reg(struct drm_device *dev, int reg, 2267 struct drm_i915_gem_object *obj) 2268 { 2269 drm_i915_private_t *dev_priv = dev->dev_private; 2270 uint32_t val; 2271 2272 if (obj) { 2273 u32 size = obj->gtt_space->size; 2274 uint32_t pitch_val; 2275 2276 WARN((obj->gtt_offset & ~I830_FENCE_START_MASK) || 2277 (size & -size) != size || 2278 (obj->gtt_offset & (size - 1)), 2279 "object 0x%08x not 512K or pot-size 0x%08x aligned\n", 2280 obj->gtt_offset, size); 2281 2282 pitch_val = obj->stride / 128; 2283 pitch_val = ffs(pitch_val) - 1; 2284 2285 val = obj->gtt_offset; 2286 if (obj->tiling_mode == I915_TILING_Y) 2287 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 2288 val |= I830_FENCE_SIZE_BITS(size); 2289 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 2290 val |= I830_FENCE_REG_VALID; 2291 } else 2292 val = 0; 2293 2294 I915_WRITE(FENCE_REG_830_0 + reg * 4, val); 2295 POSTING_READ(FENCE_REG_830_0 + reg * 4); 2296 } 2297 2298 inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj) 2299 { 2300 return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT; 2301 } 2302 2303 static void i915_gem_write_fence(struct drm_device *dev, int reg, 2304 struct drm_i915_gem_object *obj) 2305 { 2306 struct drm_i915_private *dev_priv = dev->dev_private; 2307 2308 /* Ensure that all CPU reads are completed before installing a fence 2309 * and all writes before removing the fence. 2310 */ 2311 if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj)) 2312 cpu_mfence(); 2313 2314 switch (INTEL_INFO(dev)->gen) { 2315 case 7: 2316 case 6: 2317 case 5: 2318 case 4: i965_write_fence_reg(dev, reg, obj); break; 2319 case 3: i915_write_fence_reg(dev, reg, obj); break; 2320 case 2: i830_write_fence_reg(dev, reg, obj); break; 2321 default: BUG(); 2322 } 2323 2324 /* And similarly be paranoid that no direct access to this region 2325 * is reordered to before the fence is installed. 2326 */ 2327 if (i915_gem_object_needs_mb(obj)) 2328 cpu_mfence(); 2329 } 2330 2331 static inline int fence_number(struct drm_i915_private *dev_priv, 2332 struct drm_i915_fence_reg *fence) 2333 { 2334 return fence - dev_priv->fence_regs; 2335 } 2336 2337 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, 2338 struct drm_i915_fence_reg *fence, 2339 bool enable) 2340 { 2341 struct drm_device *dev = obj->base.dev; 2342 struct drm_i915_private *dev_priv = dev->dev_private; 2343 int fence_reg = fence_number(dev_priv, fence); 2344 2345 /* In order to fully serialize access to the fenced region and 2346 * the update to the fence register we need to take extreme 2347 * measures on SNB+. In theory, the write to the fence register 2348 * flushes all memory transactions before, and coupled with the 2349 * mb() placed around the register write we serialise all memory 2350 * operations with respect to the changes in the tiler. Yet, on 2351 * SNB+ we need to take a step further and emit an explicit wbinvd() 2352 * on each processor in order to manually flush all memory 2353 * transactions before updating the fence register. 2354 */ 2355 if (HAS_LLC(obj->base.dev)) 2356 cpu_wbinvd_on_all_cpus(); 2357 i915_gem_write_fence(dev, fence_reg, enable ? obj : NULL); 2358 2359 if (enable) { 2360 obj->fence_reg = fence_reg; 2361 fence->obj = obj; 2362 list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list); 2363 } else { 2364 obj->fence_reg = I915_FENCE_REG_NONE; 2365 fence->obj = NULL; 2366 list_del_init(&fence->lru_list); 2367 } 2368 } 2369 2370 static int 2371 i915_gem_object_wait_fence(struct drm_i915_gem_object *obj) 2372 { 2373 if (obj->last_fenced_seqno) { 2374 int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno); 2375 if (ret) 2376 return ret; 2377 2378 obj->last_fenced_seqno = 0; 2379 } 2380 2381 obj->fenced_gpu_access = false; 2382 return 0; 2383 } 2384 2385 int 2386 i915_gem_object_put_fence(struct drm_i915_gem_object *obj) 2387 { 2388 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2389 struct drm_i915_fence_reg *fence; 2390 int ret; 2391 2392 ret = i915_gem_object_wait_fence(obj); 2393 if (ret) 2394 return ret; 2395 2396 if (obj->fence_reg == I915_FENCE_REG_NONE) 2397 return 0; 2398 2399 fence = &dev_priv->fence_regs[obj->fence_reg]; 2400 2401 i915_gem_object_fence_lost(obj); 2402 i915_gem_object_update_fence(obj, fence, false); 2403 2404 return 0; 2405 } 2406 2407 static struct drm_i915_fence_reg * 2408 i915_find_fence_reg(struct drm_device *dev) 2409 { 2410 struct drm_i915_private *dev_priv = dev->dev_private; 2411 struct drm_i915_fence_reg *reg, *avail; 2412 int i; 2413 2414 /* First try to find a free reg */ 2415 avail = NULL; 2416 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) { 2417 reg = &dev_priv->fence_regs[i]; 2418 if (!reg->obj) 2419 return reg; 2420 2421 if (!reg->pin_count) 2422 avail = reg; 2423 } 2424 2425 if (avail == NULL) 2426 return NULL; 2427 2428 /* None available, try to steal one or wait for a user to finish */ 2429 list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) { 2430 if (reg->pin_count) 2431 continue; 2432 2433 return reg; 2434 } 2435 2436 return NULL; 2437 } 2438 2439 /** 2440 * i915_gem_object_get_fence - set up fencing for an object 2441 * @obj: object to map through a fence reg 2442 * 2443 * When mapping objects through the GTT, userspace wants to be able to write 2444 * to them without having to worry about swizzling if the object is tiled. 2445 * This function walks the fence regs looking for a free one for @obj, 2446 * stealing one if it can't find any. 2447 * 2448 * It then sets up the reg based on the object's properties: address, pitch 2449 * and tiling format. 2450 * 2451 * For an untiled surface, this removes any existing fence. 2452 */ 2453 int 2454 i915_gem_object_get_fence(struct drm_i915_gem_object *obj) 2455 { 2456 struct drm_device *dev = obj->base.dev; 2457 struct drm_i915_private *dev_priv = dev->dev_private; 2458 bool enable = obj->tiling_mode != I915_TILING_NONE; 2459 struct drm_i915_fence_reg *reg; 2460 int ret; 2461 2462 /* Have we updated the tiling parameters upon the object and so 2463 * will need to serialise the write to the associated fence register? 2464 */ 2465 if (obj->fence_dirty) { 2466 ret = i915_gem_object_wait_fence(obj); 2467 if (ret) 2468 return ret; 2469 } 2470 2471 /* Just update our place in the LRU if our fence is getting reused. */ 2472 if (obj->fence_reg != I915_FENCE_REG_NONE) { 2473 reg = &dev_priv->fence_regs[obj->fence_reg]; 2474 if (!obj->fence_dirty) { 2475 list_move_tail(®->lru_list, 2476 &dev_priv->mm.fence_list); 2477 return 0; 2478 } 2479 } else if (enable) { 2480 reg = i915_find_fence_reg(dev); 2481 if (reg == NULL) 2482 return -EDEADLK; 2483 2484 if (reg->obj) { 2485 struct drm_i915_gem_object *old = reg->obj; 2486 2487 ret = i915_gem_object_wait_fence(old); 2488 if (ret) 2489 return ret; 2490 2491 i915_gem_object_fence_lost(old); 2492 } 2493 } else 2494 return 0; 2495 2496 i915_gem_object_update_fence(obj, reg, enable); 2497 obj->fence_dirty = false; 2498 2499 return 0; 2500 } 2501 2502 static bool i915_gem_valid_gtt_space(struct drm_device *dev, 2503 struct drm_mm_node *gtt_space, 2504 unsigned long cache_level) 2505 { 2506 struct drm_mm_node *other; 2507 2508 /* On non-LLC machines we have to be careful when putting differing 2509 * types of snoopable memory together to avoid the prefetcher 2510 * crossing memory domains and dying. 2511 */ 2512 if (HAS_LLC(dev)) 2513 return true; 2514 2515 if (gtt_space == NULL) 2516 return true; 2517 2518 if (list_empty(>t_space->node_list)) 2519 return true; 2520 2521 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list); 2522 if (other->allocated && !other->hole_follows && other->color != cache_level) 2523 return false; 2524 2525 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list); 2526 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level) 2527 return false; 2528 2529 return true; 2530 } 2531 2532 static void i915_gem_verify_gtt(struct drm_device *dev) 2533 { 2534 #if WATCH_GTT 2535 struct drm_i915_private *dev_priv = dev->dev_private; 2536 struct drm_i915_gem_object *obj; 2537 int err = 0; 2538 2539 list_for_each_entry(obj, &dev_priv->mm.gtt_list, gtt_list) { 2540 if (obj->gtt_space == NULL) { 2541 printk(KERN_ERR "object found on GTT list with no space reserved\n"); 2542 err++; 2543 continue; 2544 } 2545 2546 if (obj->cache_level != obj->gtt_space->color) { 2547 printk(KERN_ERR "object reserved space [%08lx, %08lx] with wrong color, cache_level=%x, color=%lx\n", 2548 obj->gtt_space->start, 2549 obj->gtt_space->start + obj->gtt_space->size, 2550 obj->cache_level, 2551 obj->gtt_space->color); 2552 err++; 2553 continue; 2554 } 2555 2556 if (!i915_gem_valid_gtt_space(dev, 2557 obj->gtt_space, 2558 obj->cache_level)) { 2559 printk(KERN_ERR "invalid GTT space found at [%08lx, %08lx] - color=%x\n", 2560 obj->gtt_space->start, 2561 obj->gtt_space->start + obj->gtt_space->size, 2562 obj->cache_level); 2563 err++; 2564 continue; 2565 } 2566 } 2567 2568 WARN_ON(err); 2569 #endif 2570 } 2571 2572 /** 2573 * Finds free space in the GTT aperture and binds the object there. 2574 */ 2575 static int 2576 i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, 2577 unsigned alignment, 2578 bool map_and_fenceable, 2579 bool nonblocking) 2580 { 2581 struct drm_device *dev = obj->base.dev; 2582 drm_i915_private_t *dev_priv = dev->dev_private; 2583 struct drm_mm_node *free_space; 2584 uint32_t size, fence_size, fence_alignment, unfenced_alignment; 2585 bool mappable, fenceable; 2586 int ret; 2587 2588 fence_size = i915_gem_get_gtt_size(dev, 2589 obj->base.size, 2590 obj->tiling_mode); 2591 fence_alignment = i915_gem_get_gtt_alignment(dev, 2592 obj->base.size, 2593 obj->tiling_mode, true); 2594 unfenced_alignment = 2595 i915_gem_get_gtt_alignment(dev, 2596 obj->base.size, 2597 obj->tiling_mode, false); 2598 2599 if (alignment == 0) 2600 alignment = map_and_fenceable ? fence_alignment : 2601 unfenced_alignment; 2602 if (map_and_fenceable && alignment & (fence_alignment - 1)) { 2603 DRM_ERROR("Invalid object alignment requested %u\n", alignment); 2604 return -EINVAL; 2605 } 2606 2607 size = map_and_fenceable ? fence_size : obj->base.size; 2608 2609 /* If the object is bigger than the entire aperture, reject it early 2610 * before evicting everything in a vain attempt to find space. 2611 */ 2612 if (obj->base.size > 2613 (map_and_fenceable ? dev_priv->gtt.mappable_end : dev_priv->gtt.total)) { 2614 DRM_ERROR("Attempting to bind an object larger than the aperture\n"); 2615 return -E2BIG; 2616 } 2617 2618 search_free: 2619 if (map_and_fenceable) 2620 free_space = 2621 drm_mm_search_free_in_range_color(&dev_priv->mm.gtt_space, 2622 size, alignment, obj->cache_level, 2623 0, dev_priv->gtt.mappable_end, 2624 false); 2625 else 2626 free_space = drm_mm_search_free_color(&dev_priv->mm.gtt_space, 2627 size, alignment, obj->cache_level, 2628 false); 2629 if (free_space != NULL) { 2630 if (map_and_fenceable) 2631 obj->gtt_space = 2632 drm_mm_get_block_range_generic(free_space, 2633 size, alignment, obj->cache_level, 2634 0, dev_priv->gtt.mappable_end, 2635 false); 2636 else 2637 obj->gtt_space = 2638 drm_mm_get_block_generic(free_space, 2639 size, alignment, obj->cache_level, 2640 false); 2641 } 2642 if (obj->gtt_space == NULL) { 2643 ret = i915_gem_evict_something(dev, size, alignment, 2644 obj->cache_level, 2645 map_and_fenceable, 2646 nonblocking); 2647 if (ret) 2648 return ret; 2649 2650 goto search_free; 2651 } 2652 2653 /* 2654 * NOTE: i915_gem_object_get_pages_gtt() cannot 2655 * return ENOMEM, since we used VM_ALLOC_RETRY. 2656 */ 2657 ret = i915_gem_object_get_pages_gtt(obj); 2658 if (ret != 0) { 2659 drm_mm_put_block(obj->gtt_space); 2660 obj->gtt_space = NULL; 2661 return ret; 2662 } 2663 2664 i915_gem_gtt_bind_object(obj, obj->cache_level); 2665 if (ret != 0) { 2666 i915_gem_object_put_pages_gtt(obj); 2667 drm_mm_put_block(obj->gtt_space); 2668 obj->gtt_space = NULL; 2669 if (i915_gem_evict_everything(dev)) 2670 return (ret); 2671 goto search_free; 2672 } 2673 2674 list_add_tail(&obj->gtt_list, &dev_priv->mm.bound_list); 2675 list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list); 2676 2677 obj->gtt_offset = obj->gtt_space->start; 2678 2679 fenceable = 2680 obj->gtt_space->size == fence_size && 2681 (obj->gtt_space->start & (fence_alignment - 1)) == 0; 2682 2683 mappable = 2684 obj->gtt_offset + obj->base.size <= dev_priv->gtt.mappable_end; 2685 2686 obj->map_and_fenceable = mappable && fenceable; 2687 2688 i915_gem_verify_gtt(dev); 2689 return 0; 2690 } 2691 2692 void 2693 i915_gem_clflush_object(struct drm_i915_gem_object *obj) 2694 { 2695 2696 /* If we don't have a page list set up, then we're not pinned 2697 * to GPU, and we can ignore the cache flush because it'll happen 2698 * again at bind time. 2699 */ 2700 if (obj->pages == NULL) 2701 return; 2702 2703 /* 2704 * Stolen memory is always coherent with the GPU as it is explicitly 2705 * marked as wc by the system, or the system is cache-coherent. 2706 */ 2707 if (obj->stolen) 2708 return; 2709 2710 /* If the GPU is snooping the contents of the CPU cache, 2711 * we do not need to manually clear the CPU cache lines. However, 2712 * the caches are only snooped when the render cache is 2713 * flushed/invalidated. As we always have to emit invalidations 2714 * and flushes when moving into and out of the RENDER domain, correct 2715 * snooping behaviour occurs naturally as the result of our domain 2716 * tracking. 2717 */ 2718 if (obj->cache_level != I915_CACHE_NONE) 2719 return; 2720 2721 drm_clflush_pages(obj->pages, obj->base.size / PAGE_SIZE); 2722 } 2723 2724 /** Flushes the GTT write domain for the object if it's dirty. */ 2725 static void 2726 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) 2727 { 2728 uint32_t old_write_domain; 2729 2730 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) 2731 return; 2732 2733 /* No actual flushing is required for the GTT write domain. Writes 2734 * to it immediately go to main memory as far as we know, so there's 2735 * no chipset flush. It also doesn't land in render cache. 2736 * 2737 * However, we do have to enforce the order so that all writes through 2738 * the GTT land before any writes to the device, such as updates to 2739 * the GATT itself. 2740 */ 2741 cpu_sfence(); 2742 2743 old_write_domain = obj->base.write_domain; 2744 obj->base.write_domain = 0; 2745 } 2746 2747 /** Flushes the CPU write domain for the object if it's dirty. */ 2748 static void 2749 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) 2750 { 2751 uint32_t old_write_domain; 2752 2753 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) 2754 return; 2755 2756 i915_gem_clflush_object(obj); 2757 i915_gem_chipset_flush(obj->base.dev); 2758 old_write_domain = obj->base.write_domain; 2759 obj->base.write_domain = 0; 2760 } 2761 2762 /** 2763 * Moves a single object to the GTT read, and possibly write domain. 2764 * 2765 * This function returns when the move is complete, including waiting on 2766 * flushes to occur. 2767 */ 2768 int 2769 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 2770 { 2771 drm_i915_private_t *dev_priv = obj->base.dev->dev_private; 2772 uint32_t old_write_domain, old_read_domains; 2773 int ret; 2774 2775 /* Not valid to be called on unbound objects. */ 2776 if (obj->gtt_space == NULL) 2777 return -EINVAL; 2778 2779 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) 2780 return 0; 2781 2782 ret = i915_gem_object_wait_rendering(obj, !write); 2783 if (ret) 2784 return ret; 2785 2786 i915_gem_object_flush_cpu_write_domain(obj); 2787 2788 /* Serialise direct access to this object with the barriers for 2789 * coherent writes from the GPU, by effectively invalidating the 2790 * GTT domain upon first access. 2791 */ 2792 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 2793 cpu_mfence(); 2794 2795 old_write_domain = obj->base.write_domain; 2796 old_read_domains = obj->base.read_domains; 2797 2798 /* It should now be out of any other write domains, and we can update 2799 * the domain values for our changes. 2800 */ 2801 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 2802 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 2803 if (write) { 2804 obj->base.read_domains = I915_GEM_DOMAIN_GTT; 2805 obj->base.write_domain = I915_GEM_DOMAIN_GTT; 2806 obj->dirty = 1; 2807 } 2808 2809 /* And bump the LRU for this access */ 2810 if (i915_gem_object_is_inactive(obj)) 2811 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list); 2812 2813 return 0; 2814 } 2815 2816 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 2817 enum i915_cache_level cache_level) 2818 { 2819 struct drm_device *dev = obj->base.dev; 2820 drm_i915_private_t *dev_priv = dev->dev_private; 2821 int ret; 2822 2823 if (obj->cache_level == cache_level) 2824 return 0; 2825 2826 if (obj->pin_count) { 2827 DRM_DEBUG("can not change the cache level of pinned objects\n"); 2828 return -EBUSY; 2829 } 2830 2831 if (!i915_gem_valid_gtt_space(dev, obj->gtt_space, cache_level)) { 2832 ret = i915_gem_object_unbind(obj); 2833 if (ret) 2834 return ret; 2835 } 2836 2837 if (obj->gtt_space) { 2838 ret = i915_gem_object_finish_gpu(obj); 2839 if (ret) 2840 return ret; 2841 2842 i915_gem_object_finish_gtt(obj); 2843 2844 /* Before SandyBridge, you could not use tiling or fence 2845 * registers with snooped memory, so relinquish any fences 2846 * currently pointing to our region in the aperture. 2847 */ 2848 if (INTEL_INFO(dev)->gen < 6) { 2849 ret = i915_gem_object_put_fence(obj); 2850 if (ret) 2851 return ret; 2852 } 2853 2854 if (obj->has_global_gtt_mapping) 2855 i915_gem_gtt_bind_object(obj, cache_level); 2856 if (obj->has_aliasing_ppgtt_mapping) 2857 i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt, 2858 obj, cache_level); 2859 2860 obj->gtt_space->color = cache_level; 2861 } 2862 2863 if (cache_level == I915_CACHE_NONE) { 2864 u32 old_read_domains, old_write_domain; 2865 2866 /* If we're coming from LLC cached, then we haven't 2867 * actually been tracking whether the data is in the 2868 * CPU cache or not, since we only allow one bit set 2869 * in obj->write_domain and have been skipping the clflushes. 2870 * Just set it to the CPU cache for now. 2871 */ 2872 WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU); 2873 WARN_ON(obj->base.read_domains & ~I915_GEM_DOMAIN_CPU); 2874 2875 old_read_domains = obj->base.read_domains; 2876 old_write_domain = obj->base.write_domain; 2877 2878 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 2879 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 2880 2881 } 2882 2883 obj->cache_level = cache_level; 2884 i915_gem_verify_gtt(dev); 2885 return 0; 2886 } 2887 2888 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 2889 struct drm_file *file) 2890 { 2891 struct drm_i915_gem_caching *args = data; 2892 struct drm_i915_gem_object *obj; 2893 int ret; 2894 2895 ret = i915_mutex_lock_interruptible(dev); 2896 if (ret) 2897 return ret; 2898 2899 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 2900 if (&obj->base == NULL) { 2901 ret = -ENOENT; 2902 goto unlock; 2903 } 2904 2905 args->caching = obj->cache_level != I915_CACHE_NONE; 2906 2907 drm_gem_object_unreference(&obj->base); 2908 unlock: 2909 mutex_unlock(&dev->struct_mutex); 2910 return ret; 2911 } 2912 2913 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 2914 struct drm_file *file) 2915 { 2916 struct drm_i915_gem_caching *args = data; 2917 struct drm_i915_gem_object *obj; 2918 enum i915_cache_level level; 2919 int ret; 2920 2921 switch (args->caching) { 2922 case I915_CACHING_NONE: 2923 level = I915_CACHE_NONE; 2924 break; 2925 case I915_CACHING_CACHED: 2926 level = I915_CACHE_LLC; 2927 break; 2928 default: 2929 return -EINVAL; 2930 } 2931 2932 ret = i915_mutex_lock_interruptible(dev); 2933 if (ret) 2934 return ret; 2935 2936 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 2937 if (&obj->base == NULL) { 2938 ret = -ENOENT; 2939 goto unlock; 2940 } 2941 2942 ret = i915_gem_object_set_cache_level(obj, level); 2943 2944 drm_gem_object_unreference(&obj->base); 2945 unlock: 2946 mutex_unlock(&dev->struct_mutex); 2947 return ret; 2948 } 2949 2950 /* 2951 * Prepare buffer for display plane (scanout, cursors, etc). 2952 * Can be called from an uninterruptible phase (modesetting) and allows 2953 * any flushes to be pipelined (for pageflips). 2954 */ 2955 int 2956 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 2957 u32 alignment, 2958 struct intel_ring_buffer *pipelined) 2959 { 2960 u32 old_read_domains, old_write_domain; 2961 int ret; 2962 2963 if (pipelined != obj->ring) { 2964 ret = i915_gem_object_sync(obj, pipelined); 2965 if (ret) 2966 return ret; 2967 } 2968 2969 /* The display engine is not coherent with the LLC cache on gen6. As 2970 * a result, we make sure that the pinning that is about to occur is 2971 * done with uncached PTEs. This is lowest common denominator for all 2972 * chipsets. 2973 * 2974 * However for gen6+, we could do better by using the GFDT bit instead 2975 * of uncaching, which would allow us to flush all the LLC-cached data 2976 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 2977 */ 2978 ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE); 2979 if (ret) 2980 return ret; 2981 2982 /* As the user may map the buffer once pinned in the display plane 2983 * (e.g. libkms for the bootup splash), we have to ensure that we 2984 * always use map_and_fenceable for all scanout buffers. 2985 */ 2986 ret = i915_gem_object_pin(obj, alignment, true, false); 2987 if (ret) 2988 return ret; 2989 2990 i915_gem_object_flush_cpu_write_domain(obj); 2991 2992 old_write_domain = obj->base.write_domain; 2993 old_read_domains = obj->base.read_domains; 2994 2995 /* It should now be out of any other write domains, and we can update 2996 * the domain values for our changes. 2997 */ 2998 obj->base.write_domain = 0; 2999 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 3000 3001 return 0; 3002 } 3003 3004 int 3005 i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj) 3006 { 3007 int ret; 3008 3009 if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0) 3010 return 0; 3011 3012 ret = i915_gem_object_wait_rendering(obj, false); 3013 if (ret) 3014 return ret; 3015 3016 /* Ensure that we invalidate the GPU's caches and TLBs. */ 3017 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS; 3018 return 0; 3019 } 3020 3021 /** 3022 * Moves a single object to the CPU read, and possibly write domain. 3023 * 3024 * This function returns when the move is complete, including waiting on 3025 * flushes to occur. 3026 */ 3027 int 3028 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 3029 { 3030 uint32_t old_write_domain, old_read_domains; 3031 int ret; 3032 3033 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) 3034 return 0; 3035 3036 ret = i915_gem_object_wait_rendering(obj, !write); 3037 if (ret) 3038 return ret; 3039 3040 i915_gem_object_flush_gtt_write_domain(obj); 3041 3042 old_write_domain = obj->base.write_domain; 3043 old_read_domains = obj->base.read_domains; 3044 3045 /* Flush the CPU cache if it's still invalid. */ 3046 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { 3047 i915_gem_clflush_object(obj); 3048 3049 obj->base.read_domains |= I915_GEM_DOMAIN_CPU; 3050 } 3051 3052 /* It should now be out of any other write domains, and we can update 3053 * the domain values for our changes. 3054 */ 3055 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 3056 3057 /* If we're writing through the CPU, then the GPU read domains will 3058 * need to be invalidated at next use. 3059 */ 3060 if (write) { 3061 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 3062 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 3063 } 3064 3065 return 0; 3066 } 3067 3068 /* Throttle our rendering by waiting until the ring has completed our requests 3069 * emitted over 20 msec ago. 3070 * 3071 * Note that if we were to use the current jiffies each time around the loop, 3072 * we wouldn't escape the function with any frames outstanding if the time to 3073 * render a frame was over 20ms. 3074 * 3075 * This should get us reasonable parallelism between CPU and GPU but also 3076 * relatively low latency when blocking on a particular request to finish. 3077 */ 3078 static int 3079 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 3080 { 3081 struct drm_i915_private *dev_priv = dev->dev_private; 3082 struct drm_i915_file_private *file_priv = file->driver_priv; 3083 unsigned long recent_enough = jiffies - msecs_to_jiffies(20); 3084 struct drm_i915_gem_request *request; 3085 struct intel_ring_buffer *ring = NULL; 3086 unsigned reset_counter; 3087 u32 seqno = 0; 3088 int ret; 3089 3090 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 3091 if (ret) 3092 return ret; 3093 3094 ret = i915_gem_check_wedge(&dev_priv->gpu_error, false); 3095 if (ret) 3096 return ret; 3097 3098 spin_lock(&file_priv->mm.lock); 3099 list_for_each_entry(request, &file_priv->mm.request_list, client_list) { 3100 if (time_after_eq(request->emitted_jiffies, recent_enough)) 3101 break; 3102 3103 ring = request->ring; 3104 seqno = request->seqno; 3105 } 3106 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 3107 spin_unlock(&file_priv->mm.lock); 3108 3109 if (seqno == 0) 3110 return 0; 3111 3112 ret = __wait_seqno(ring, seqno, reset_counter, true, NULL); 3113 if (ret == 0) 3114 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0); 3115 3116 return ret; 3117 } 3118 3119 int 3120 i915_gem_object_pin(struct drm_i915_gem_object *obj, 3121 uint32_t alignment, 3122 bool map_and_fenceable, 3123 bool nonblocking) 3124 { 3125 int ret; 3126 3127 if (WARN_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) 3128 return -EBUSY; 3129 3130 if (obj->gtt_space != NULL) { 3131 if ((alignment && obj->gtt_offset & (alignment - 1)) || 3132 (map_and_fenceable && !obj->map_and_fenceable)) { 3133 WARN(obj->pin_count, 3134 "bo is already pinned with incorrect alignment:" 3135 " offset=%x, req.alignment=%x, req.map_and_fenceable=%d," 3136 " obj->map_and_fenceable=%d\n", 3137 obj->gtt_offset, alignment, 3138 map_and_fenceable, 3139 obj->map_and_fenceable); 3140 ret = i915_gem_object_unbind(obj); 3141 if (ret) 3142 return ret; 3143 } 3144 } 3145 3146 if (obj->gtt_space == NULL) { 3147 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3148 3149 ret = i915_gem_object_bind_to_gtt(obj, alignment, 3150 map_and_fenceable, 3151 nonblocking); 3152 if (ret) 3153 return ret; 3154 3155 if (!dev_priv->mm.aliasing_ppgtt) 3156 i915_gem_gtt_bind_object(obj, obj->cache_level); 3157 } 3158 3159 if (!obj->has_global_gtt_mapping && map_and_fenceable) 3160 i915_gem_gtt_bind_object(obj, obj->cache_level); 3161 3162 obj->pin_count++; 3163 obj->pin_mappable |= map_and_fenceable; 3164 3165 return 0; 3166 } 3167 3168 void 3169 i915_gem_object_unpin(struct drm_i915_gem_object *obj) 3170 { 3171 BUG_ON(obj->pin_count == 0); 3172 BUG_ON(obj->gtt_space == NULL); 3173 3174 if (--obj->pin_count == 0) 3175 obj->pin_mappable = false; 3176 } 3177 3178 int 3179 i915_gem_pin_ioctl(struct drm_device *dev, void *data, 3180 struct drm_file *file) 3181 { 3182 struct drm_i915_gem_pin *args = data; 3183 struct drm_i915_gem_object *obj; 3184 int ret; 3185 3186 ret = i915_mutex_lock_interruptible(dev); 3187 if (ret) 3188 return ret; 3189 3190 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3191 if (&obj->base == NULL) { 3192 ret = -ENOENT; 3193 goto unlock; 3194 } 3195 3196 if (obj->madv != I915_MADV_WILLNEED) { 3197 DRM_ERROR("Attempting to pin a purgeable buffer\n"); 3198 ret = -EINVAL; 3199 goto out; 3200 } 3201 3202 if (obj->pin_filp != NULL && obj->pin_filp != file) { 3203 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n", 3204 args->handle); 3205 ret = -EINVAL; 3206 goto out; 3207 } 3208 3209 if (obj->user_pin_count == 0) { 3210 ret = i915_gem_object_pin(obj, args->alignment, true, false); 3211 if (ret) 3212 goto out; 3213 } 3214 3215 obj->user_pin_count++; 3216 obj->pin_filp = file; 3217 3218 /* XXX - flush the CPU caches for pinned objects 3219 * as the X server doesn't manage domains yet 3220 */ 3221 i915_gem_object_flush_cpu_write_domain(obj); 3222 args->offset = obj->gtt_offset; 3223 out: 3224 drm_gem_object_unreference(&obj->base); 3225 unlock: 3226 mutex_unlock(&dev->struct_mutex); 3227 return ret; 3228 } 3229 3230 int 3231 i915_gem_unpin_ioctl(struct drm_device *dev, void *data, 3232 struct drm_file *file) 3233 { 3234 struct drm_i915_gem_pin *args = data; 3235 struct drm_i915_gem_object *obj; 3236 int ret; 3237 3238 ret = i915_mutex_lock_interruptible(dev); 3239 if (ret) 3240 return ret; 3241 3242 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3243 if (&obj->base == NULL) { 3244 ret = -ENOENT; 3245 goto unlock; 3246 } 3247 3248 if (obj->pin_filp != file) { 3249 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n", 3250 args->handle); 3251 ret = -EINVAL; 3252 goto out; 3253 } 3254 obj->user_pin_count--; 3255 if (obj->user_pin_count == 0) { 3256 obj->pin_filp = NULL; 3257 i915_gem_object_unpin(obj); 3258 } 3259 3260 out: 3261 drm_gem_object_unreference(&obj->base); 3262 unlock: 3263 mutex_unlock(&dev->struct_mutex); 3264 return ret; 3265 } 3266 3267 int 3268 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 3269 struct drm_file *file) 3270 { 3271 struct drm_i915_gem_busy *args = data; 3272 struct drm_i915_gem_object *obj; 3273 int ret; 3274 3275 ret = i915_mutex_lock_interruptible(dev); 3276 if (ret) 3277 return ret; 3278 3279 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3280 if (&obj->base == NULL) { 3281 ret = -ENOENT; 3282 goto unlock; 3283 } 3284 3285 /* Count all active objects as busy, even if they are currently not used 3286 * by the gpu. Users of this interface expect objects to eventually 3287 * become non-busy without any further actions, therefore emit any 3288 * necessary flushes here. 3289 */ 3290 ret = i915_gem_object_flush_active(obj); 3291 3292 args->busy = obj->active; 3293 if (obj->ring) { 3294 args->busy |= intel_ring_flag(obj->ring) << 16; 3295 } 3296 3297 drm_gem_object_unreference(&obj->base); 3298 unlock: 3299 mutex_unlock(&dev->struct_mutex); 3300 return ret; 3301 } 3302 3303 int 3304 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 3305 struct drm_file *file_priv) 3306 { 3307 return i915_gem_ring_throttle(dev, file_priv); 3308 } 3309 3310 int 3311 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 3312 struct drm_file *file_priv) 3313 { 3314 struct drm_i915_gem_madvise *args = data; 3315 struct drm_i915_gem_object *obj; 3316 int ret; 3317 3318 switch (args->madv) { 3319 case I915_MADV_DONTNEED: 3320 case I915_MADV_WILLNEED: 3321 break; 3322 default: 3323 return -EINVAL; 3324 } 3325 3326 ret = i915_mutex_lock_interruptible(dev); 3327 if (ret) 3328 return ret; 3329 3330 obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle)); 3331 if (&obj->base == NULL) { 3332 ret = -ENOENT; 3333 goto unlock; 3334 } 3335 3336 if (obj->pin_count) { 3337 ret = -EINVAL; 3338 goto out; 3339 } 3340 3341 if (obj->madv != __I915_MADV_PURGED) 3342 obj->madv = args->madv; 3343 3344 /* if the object is no longer attached, discard its backing storage */ 3345 if (i915_gem_object_is_purgeable(obj) && obj->pages == NULL) 3346 i915_gem_object_truncate(obj); 3347 3348 args->retained = obj->madv != __I915_MADV_PURGED; 3349 3350 out: 3351 drm_gem_object_unreference(&obj->base); 3352 unlock: 3353 mutex_unlock(&dev->struct_mutex); 3354 return ret; 3355 } 3356 3357 void i915_gem_object_init(struct drm_i915_gem_object *obj, 3358 const struct drm_i915_gem_object_ops *ops) 3359 { 3360 INIT_LIST_HEAD(&obj->mm_list); 3361 INIT_LIST_HEAD(&obj->gtt_list); 3362 INIT_LIST_HEAD(&obj->ring_list); 3363 INIT_LIST_HEAD(&obj->exec_list); 3364 3365 obj->ops = ops; 3366 3367 obj->fence_reg = I915_FENCE_REG_NONE; 3368 obj->madv = I915_MADV_WILLNEED; 3369 /* Avoid an unnecessary call to unbind on the first bind. */ 3370 obj->map_and_fenceable = true; 3371 3372 i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size); 3373 } 3374 3375 static const struct drm_i915_gem_object_ops i915_gem_object_ops = { 3376 .get_pages = i915_gem_object_get_pages_gtt, 3377 .put_pages = i915_gem_object_put_pages_gtt, 3378 }; 3379 3380 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, 3381 size_t size) 3382 { 3383 struct drm_i915_gem_object *obj; 3384 #if 0 3385 struct address_space *mapping; 3386 u32 mask; 3387 #endif 3388 3389 obj = kmalloc(sizeof(*obj), M_DRM, M_WAITOK | M_ZERO); 3390 if (obj == NULL) 3391 return NULL; 3392 3393 if (drm_gem_object_init(dev, &obj->base, size) != 0) { 3394 kfree(obj); 3395 return NULL; 3396 } 3397 3398 #if 0 3399 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 3400 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) { 3401 /* 965gm cannot relocate objects above 4GiB. */ 3402 mask &= ~__GFP_HIGHMEM; 3403 mask |= __GFP_DMA32; 3404 } 3405 3406 mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 3407 mapping_set_gfp_mask(mapping, mask); 3408 #endif 3409 3410 i915_gem_object_init(obj, &i915_gem_object_ops); 3411 3412 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 3413 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 3414 3415 if (HAS_LLC(dev)) { 3416 /* On some devices, we can have the GPU use the LLC (the CPU 3417 * cache) for about a 10% performance improvement 3418 * compared to uncached. Graphics requests other than 3419 * display scanout are coherent with the CPU in 3420 * accessing this cache. This means in this mode we 3421 * don't need to clflush on the CPU side, and on the 3422 * GPU side we only need to flush internal caches to 3423 * get data visible to the CPU. 3424 * 3425 * However, we maintain the display planes as UC, and so 3426 * need to rebind when first used as such. 3427 */ 3428 obj->cache_level = I915_CACHE_LLC; 3429 } else 3430 obj->cache_level = I915_CACHE_NONE; 3431 3432 return obj; 3433 } 3434 3435 int i915_gem_init_object(struct drm_gem_object *obj) 3436 { 3437 BUG(); 3438 3439 return 0; 3440 } 3441 3442 void i915_gem_free_object(struct drm_gem_object *gem_obj) 3443 { 3444 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 3445 struct drm_device *dev = obj->base.dev; 3446 drm_i915_private_t *dev_priv = dev->dev_private; 3447 3448 if (obj->phys_obj) 3449 i915_gem_detach_phys_object(dev, obj); 3450 3451 obj->pin_count = 0; 3452 if (WARN_ON(i915_gem_object_unbind(obj) == -ERESTARTSYS)) { 3453 bool was_interruptible; 3454 3455 was_interruptible = dev_priv->mm.interruptible; 3456 dev_priv->mm.interruptible = false; 3457 3458 WARN_ON(i915_gem_object_unbind(obj)); 3459 3460 dev_priv->mm.interruptible = was_interruptible; 3461 } 3462 3463 drm_gem_free_mmap_offset(&obj->base); 3464 3465 drm_gem_object_release(&obj->base); 3466 i915_gem_info_remove_obj(dev_priv, obj->base.size); 3467 3468 drm_free(obj->bit_17, M_DRM); 3469 drm_free(obj, M_DRM); 3470 } 3471 3472 int 3473 i915_gem_idle(struct drm_device *dev) 3474 { 3475 drm_i915_private_t *dev_priv = dev->dev_private; 3476 int ret; 3477 3478 mutex_lock(&dev->struct_mutex); 3479 3480 if (dev_priv->mm.suspended) { 3481 mutex_unlock(&dev->struct_mutex); 3482 return 0; 3483 } 3484 3485 ret = i915_gpu_idle(dev); 3486 if (ret) { 3487 mutex_unlock(&dev->struct_mutex); 3488 return ret; 3489 } 3490 i915_gem_retire_requests(dev); 3491 3492 /* Under UMS, be paranoid and evict. */ 3493 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 3494 i915_gem_evict_everything(dev); 3495 3496 /* Hack! Don't let anybody do execbuf while we don't control the chip. 3497 * We need to replace this with a semaphore, or something. 3498 * And not confound mm.suspended! 3499 */ 3500 dev_priv->mm.suspended = 1; 3501 del_timer_sync(&dev_priv->gpu_error.hangcheck_timer); 3502 3503 i915_kernel_lost_context(dev); 3504 i915_gem_cleanup_ringbuffer(dev); 3505 3506 mutex_unlock(&dev->struct_mutex); 3507 3508 /* Cancel the retire work handler, which should be idle now. */ 3509 cancel_delayed_work_sync(&dev_priv->mm.retire_work); 3510 3511 return 0; 3512 } 3513 3514 void i915_gem_l3_remap(struct drm_device *dev) 3515 { 3516 drm_i915_private_t *dev_priv = dev->dev_private; 3517 u32 misccpctl; 3518 int i; 3519 3520 if (!HAS_L3_GPU_CACHE(dev)) 3521 return; 3522 3523 if (!dev_priv->l3_parity.remap_info) 3524 return; 3525 3526 misccpctl = I915_READ(GEN7_MISCCPCTL); 3527 I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE); 3528 POSTING_READ(GEN7_MISCCPCTL); 3529 3530 for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) { 3531 u32 remap = I915_READ(GEN7_L3LOG_BASE + i); 3532 if (remap && remap != dev_priv->l3_parity.remap_info[i/4]) 3533 DRM_DEBUG("0x%x was already programmed to %x\n", 3534 GEN7_L3LOG_BASE + i, remap); 3535 if (remap && !dev_priv->l3_parity.remap_info[i/4]) 3536 DRM_DEBUG_DRIVER("Clearing remapped register\n"); 3537 I915_WRITE(GEN7_L3LOG_BASE + i, dev_priv->l3_parity.remap_info[i/4]); 3538 } 3539 3540 /* Make sure all the writes land before disabling dop clock gating */ 3541 POSTING_READ(GEN7_L3LOG_BASE); 3542 3543 I915_WRITE(GEN7_MISCCPCTL, misccpctl); 3544 } 3545 3546 void i915_gem_init_swizzling(struct drm_device *dev) 3547 { 3548 drm_i915_private_t *dev_priv = dev->dev_private; 3549 3550 if (INTEL_INFO(dev)->gen < 5 || 3551 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 3552 return; 3553 3554 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 3555 DISP_TILE_SURFACE_SWIZZLING); 3556 3557 if (IS_GEN5(dev)) 3558 return; 3559 3560 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 3561 if (IS_GEN6(dev)) 3562 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 3563 else if (IS_GEN7(dev)) 3564 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 3565 else 3566 BUG(); 3567 } 3568 3569 static bool 3570 intel_enable_blt(struct drm_device *dev) 3571 { 3572 int revision; 3573 3574 if (!HAS_BLT(dev)) 3575 return false; 3576 3577 /* The blitter was dysfunctional on early prototypes */ 3578 revision = pci_read_config(dev->dev, PCIR_REVID, 1); 3579 if (IS_GEN6(dev) && revision < 8) { 3580 DRM_INFO("BLT not supported on this pre-production hardware;" 3581 " graphics performance will be degraded.\n"); 3582 return false; 3583 } 3584 3585 return true; 3586 } 3587 3588 static int i915_gem_init_rings(struct drm_device *dev) 3589 { 3590 struct drm_i915_private *dev_priv = dev->dev_private; 3591 int ret; 3592 3593 ret = intel_init_render_ring_buffer(dev); 3594 if (ret) 3595 return ret; 3596 3597 if (HAS_BSD(dev)) { 3598 ret = intel_init_bsd_ring_buffer(dev); 3599 if (ret) 3600 goto cleanup_render_ring; 3601 } 3602 3603 if (intel_enable_blt(dev)) { 3604 ret = intel_init_blt_ring_buffer(dev); 3605 if (ret) 3606 goto cleanup_bsd_ring; 3607 } 3608 3609 ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000)); 3610 if (ret) 3611 goto cleanup_blt_ring; 3612 3613 return 0; 3614 3615 cleanup_blt_ring: 3616 intel_cleanup_ring_buffer(&dev_priv->ring[BCS]); 3617 cleanup_bsd_ring: 3618 intel_cleanup_ring_buffer(&dev_priv->ring[VCS]); 3619 cleanup_render_ring: 3620 intel_cleanup_ring_buffer(&dev_priv->ring[RCS]); 3621 3622 return ret; 3623 } 3624 3625 int 3626 i915_gem_init_hw(struct drm_device *dev) 3627 { 3628 drm_i915_private_t *dev_priv = dev->dev_private; 3629 int ret; 3630 3631 #if 0 3632 if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) 3633 return -EIO; 3634 #endif 3635 3636 if (IS_HASWELL(dev) && (I915_READ(0x120010) == 1)) 3637 I915_WRITE(0x9008, I915_READ(0x9008) | 0xf0000); 3638 3639 if (HAS_PCH_NOP(dev)) { 3640 u32 temp = I915_READ(GEN7_MSG_CTL); 3641 temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK); 3642 I915_WRITE(GEN7_MSG_CTL, temp); 3643 } 3644 3645 i915_gem_l3_remap(dev); 3646 3647 i915_gem_init_swizzling(dev); 3648 3649 ret = i915_gem_init_rings(dev); 3650 if (ret) 3651 return ret; 3652 3653 /* 3654 * XXX: There was some w/a described somewhere suggesting loading 3655 * contexts before PPGTT. 3656 */ 3657 i915_gem_context_init(dev); 3658 if (dev_priv->mm.aliasing_ppgtt) { 3659 ret = dev_priv->mm.aliasing_ppgtt->enable(dev); 3660 if (ret) { 3661 i915_gem_cleanup_aliasing_ppgtt(dev); 3662 DRM_INFO("PPGTT enable failed. This is not fatal, but unexpected\n"); 3663 } 3664 } 3665 3666 return 0; 3667 } 3668 3669 int i915_gem_init(struct drm_device *dev) 3670 { 3671 struct drm_i915_private *dev_priv = dev->dev_private; 3672 int ret; 3673 3674 mutex_lock(&dev->struct_mutex); 3675 3676 if (IS_VALLEYVIEW(dev)) { 3677 /* VLVA0 (potential hack), BIOS isn't actually waking us */ 3678 I915_WRITE(VLV_GTLC_WAKE_CTRL, 1); 3679 if (wait_for((I915_READ(VLV_GTLC_PW_STATUS) & 1) == 1, 10)) 3680 DRM_DEBUG_DRIVER("allow wake ack timed out\n"); 3681 } 3682 3683 i915_gem_init_global_gtt(dev); 3684 3685 ret = i915_gem_init_hw(dev); 3686 mutex_unlock(&dev->struct_mutex); 3687 if (ret) { 3688 i915_gem_cleanup_aliasing_ppgtt(dev); 3689 return ret; 3690 } 3691 3692 /* Allow hardware batchbuffers unless told otherwise, but not for KMS. */ 3693 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 3694 dev_priv->dri1.allow_batchbuffer = 1; 3695 return 0; 3696 } 3697 3698 void 3699 i915_gem_cleanup_ringbuffer(struct drm_device *dev) 3700 { 3701 drm_i915_private_t *dev_priv = dev->dev_private; 3702 struct intel_ring_buffer *ring; 3703 int i; 3704 3705 for_each_ring(ring, dev_priv, i) 3706 intel_cleanup_ring_buffer(ring); 3707 } 3708 3709 int 3710 i915_gem_entervt_ioctl(struct drm_device *dev, void *data, 3711 struct drm_file *file_priv) 3712 { 3713 drm_i915_private_t *dev_priv = dev->dev_private; 3714 int ret; 3715 3716 if (drm_core_check_feature(dev, DRIVER_MODESET)) 3717 return 0; 3718 3719 if (i915_reset_in_progress(&dev_priv->gpu_error)) { 3720 DRM_ERROR("Reenabling wedged hardware, good luck\n"); 3721 atomic_set(&dev_priv->gpu_error.reset_counter, 0); 3722 } 3723 3724 mutex_lock(&dev->struct_mutex); 3725 dev_priv->mm.suspended = 0; 3726 3727 ret = i915_gem_init_hw(dev); 3728 if (ret != 0) { 3729 mutex_unlock(&dev->struct_mutex); 3730 return ret; 3731 } 3732 3733 KASSERT(list_empty(&dev_priv->mm.active_list), ("active list")); 3734 mutex_unlock(&dev->struct_mutex); 3735 3736 ret = drm_irq_install(dev); 3737 if (ret) 3738 goto cleanup_ringbuffer; 3739 3740 return 0; 3741 3742 cleanup_ringbuffer: 3743 mutex_lock(&dev->struct_mutex); 3744 i915_gem_cleanup_ringbuffer(dev); 3745 dev_priv->mm.suspended = 1; 3746 mutex_unlock(&dev->struct_mutex); 3747 3748 return ret; 3749 } 3750 3751 int 3752 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, 3753 struct drm_file *file_priv) 3754 { 3755 if (drm_core_check_feature(dev, DRIVER_MODESET)) 3756 return 0; 3757 3758 drm_irq_uninstall(dev); 3759 return i915_gem_idle(dev); 3760 } 3761 3762 void 3763 i915_gem_lastclose(struct drm_device *dev) 3764 { 3765 int ret; 3766 3767 if (drm_core_check_feature(dev, DRIVER_MODESET)) 3768 return; 3769 3770 ret = i915_gem_idle(dev); 3771 if (ret) 3772 DRM_ERROR("failed to idle hardware: %d\n", ret); 3773 } 3774 3775 static void 3776 init_ring_lists(struct intel_ring_buffer *ring) 3777 { 3778 INIT_LIST_HEAD(&ring->active_list); 3779 INIT_LIST_HEAD(&ring->request_list); 3780 } 3781 3782 void 3783 i915_gem_load(struct drm_device *dev) 3784 { 3785 int i; 3786 drm_i915_private_t *dev_priv = dev->dev_private; 3787 3788 INIT_LIST_HEAD(&dev_priv->mm.active_list); 3789 INIT_LIST_HEAD(&dev_priv->mm.inactive_list); 3790 INIT_LIST_HEAD(&dev_priv->mm.unbound_list); 3791 INIT_LIST_HEAD(&dev_priv->mm.bound_list); 3792 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 3793 for (i = 0; i < I915_NUM_RINGS; i++) 3794 init_ring_lists(&dev_priv->ring[i]); 3795 for (i = 0; i < I915_MAX_NUM_FENCES; i++) 3796 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); 3797 INIT_DELAYED_WORK(&dev_priv->mm.retire_work, 3798 i915_gem_retire_work_handler); 3799 init_waitqueue_head(&dev_priv->gpu_error.reset_queue); 3800 3801 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */ 3802 if (IS_GEN3(dev)) { 3803 I915_WRITE(MI_ARB_STATE, 3804 _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE)); 3805 } 3806 3807 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; 3808 3809 /* Old X drivers will take 0-2 for front, back, depth buffers */ 3810 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 3811 dev_priv->fence_reg_start = 3; 3812 3813 if (INTEL_INFO(dev)->gen >= 7 && !IS_VALLEYVIEW(dev)) 3814 dev_priv->num_fence_regs = 32; 3815 else if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) 3816 dev_priv->num_fence_regs = 16; 3817 else 3818 dev_priv->num_fence_regs = 8; 3819 3820 /* Initialize fence registers to zero */ 3821 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 3822 i915_gem_restore_fences(dev); 3823 3824 i915_gem_detect_bit_6_swizzle(dev); 3825 init_waitqueue_head(&dev_priv->pending_flip_queue); 3826 3827 dev_priv->mm.interruptible = true; 3828 3829 #if 0 3830 dev_priv->mm.inactive_shrinker.shrink = i915_gem_inactive_shrink; 3831 dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS; 3832 register_shrinker(&dev_priv->mm.inactive_shrinker); 3833 #else 3834 dev_priv->mm.inactive_shrinker = EVENTHANDLER_REGISTER(vm_lowmem, 3835 i915_gem_lowmem, dev, EVENTHANDLER_PRI_ANY); 3836 #endif 3837 } 3838 3839 /* 3840 * Create a physically contiguous memory object for this object 3841 * e.g. for cursor + overlay regs 3842 */ 3843 static int i915_gem_init_phys_object(struct drm_device *dev, 3844 int id, int size, int align) 3845 { 3846 drm_i915_private_t *dev_priv = dev->dev_private; 3847 struct drm_i915_gem_phys_object *phys_obj; 3848 int ret; 3849 3850 if (dev_priv->mm.phys_objs[id - 1] || !size) 3851 return 0; 3852 3853 phys_obj = kmalloc(sizeof(struct drm_i915_gem_phys_object), M_DRM, 3854 M_WAITOK | M_ZERO); 3855 if (!phys_obj) 3856 return -ENOMEM; 3857 3858 phys_obj->id = id; 3859 3860 phys_obj->handle = drm_pci_alloc(dev, size, align); 3861 if (!phys_obj->handle) { 3862 ret = -ENOMEM; 3863 goto kfree_obj; 3864 } 3865 pmap_change_attr((vm_offset_t)phys_obj->handle->vaddr, 3866 size / PAGE_SIZE, PAT_WRITE_COMBINING); 3867 3868 dev_priv->mm.phys_objs[id - 1] = phys_obj; 3869 3870 return 0; 3871 3872 kfree_obj: 3873 drm_free(phys_obj, M_DRM); 3874 return ret; 3875 } 3876 3877 static void i915_gem_free_phys_object(struct drm_device *dev, int id) 3878 { 3879 drm_i915_private_t *dev_priv = dev->dev_private; 3880 struct drm_i915_gem_phys_object *phys_obj; 3881 3882 if (!dev_priv->mm.phys_objs[id - 1]) 3883 return; 3884 3885 phys_obj = dev_priv->mm.phys_objs[id - 1]; 3886 if (phys_obj->cur_obj) { 3887 i915_gem_detach_phys_object(dev, phys_obj->cur_obj); 3888 } 3889 3890 drm_pci_free(dev, phys_obj->handle); 3891 drm_free(phys_obj, M_DRM); 3892 dev_priv->mm.phys_objs[id - 1] = NULL; 3893 } 3894 3895 void i915_gem_free_all_phys_object(struct drm_device *dev) 3896 { 3897 int i; 3898 3899 for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++) 3900 i915_gem_free_phys_object(dev, i); 3901 } 3902 3903 void i915_gem_detach_phys_object(struct drm_device *dev, 3904 struct drm_i915_gem_object *obj) 3905 { 3906 struct vm_object *mapping = obj->base.vm_obj; 3907 char *vaddr; 3908 int i; 3909 int page_count; 3910 3911 if (!obj->phys_obj) 3912 return; 3913 vaddr = obj->phys_obj->handle->vaddr; 3914 3915 page_count = obj->base.size / PAGE_SIZE; 3916 VM_OBJECT_LOCK(obj->base.vm_obj); 3917 for (i = 0; i < page_count; i++) { 3918 struct vm_page *page = shmem_read_mapping_page(mapping, i); 3919 if (!IS_ERR(page)) { 3920 VM_OBJECT_UNLOCK(obj->base.vm_obj); 3921 char *dst = kmap_atomic(page); 3922 memcpy(dst, vaddr + i*PAGE_SIZE, PAGE_SIZE); 3923 kunmap_atomic(dst); 3924 3925 drm_clflush_pages(&page, 1); 3926 3927 #if 0 3928 set_page_dirty(page); 3929 mark_page_accessed(page); 3930 page_cache_release(page); 3931 #endif 3932 VM_OBJECT_LOCK(obj->base.vm_obj); 3933 vm_page_reference(page); 3934 vm_page_dirty(page); 3935 vm_page_busy_wait(page, FALSE, "i915gem"); 3936 vm_page_unwire(page, 0); 3937 vm_page_wakeup(page); 3938 } 3939 } 3940 VM_OBJECT_UNLOCK(obj->base.vm_obj); 3941 intel_gtt_chipset_flush(); 3942 3943 obj->phys_obj->cur_obj = NULL; 3944 obj->phys_obj = NULL; 3945 } 3946 3947 int 3948 i915_gem_attach_phys_object(struct drm_device *dev, 3949 struct drm_i915_gem_object *obj, 3950 int id, 3951 int align) 3952 { 3953 struct vm_object *mapping = obj->base.vm_obj; 3954 drm_i915_private_t *dev_priv = dev->dev_private; 3955 int ret = 0; 3956 int page_count; 3957 int i; 3958 3959 if (id > I915_MAX_PHYS_OBJECT) 3960 return -EINVAL; 3961 3962 if (obj->phys_obj) { 3963 if (obj->phys_obj->id == id) 3964 return 0; 3965 i915_gem_detach_phys_object(dev, obj); 3966 } 3967 3968 /* create a new object */ 3969 if (!dev_priv->mm.phys_objs[id - 1]) { 3970 ret = i915_gem_init_phys_object(dev, id, 3971 obj->base.size, align); 3972 if (ret) { 3973 DRM_ERROR("failed to init phys object %d size: %zu\n", 3974 id, obj->base.size); 3975 return ret; 3976 } 3977 } 3978 3979 /* bind to the object */ 3980 obj->phys_obj = dev_priv->mm.phys_objs[id - 1]; 3981 obj->phys_obj->cur_obj = obj; 3982 3983 page_count = obj->base.size / PAGE_SIZE; 3984 3985 VM_OBJECT_LOCK(obj->base.vm_obj); 3986 for (i = 0; i < page_count; i++) { 3987 struct vm_page *page; 3988 char *dst, *src; 3989 3990 page = shmem_read_mapping_page(mapping, i); 3991 VM_OBJECT_UNLOCK(obj->base.vm_obj); 3992 if (IS_ERR(page)) 3993 return PTR_ERR(page); 3994 3995 src = kmap_atomic(page); 3996 dst = (char*)obj->phys_obj->handle->vaddr + (i * PAGE_SIZE); 3997 memcpy(dst, src, PAGE_SIZE); 3998 kunmap_atomic(src); 3999 4000 #if 0 4001 mark_page_accessed(page); 4002 page_cache_release(page); 4003 #endif 4004 VM_OBJECT_LOCK(obj->base.vm_obj); 4005 vm_page_reference(page); 4006 vm_page_busy_wait(page, FALSE, "i915gem"); 4007 vm_page_unwire(page, 0); 4008 vm_page_wakeup(page); 4009 } 4010 VM_OBJECT_UNLOCK(obj->base.vm_obj); 4011 4012 return 0; 4013 } 4014 4015 static int 4016 i915_gem_phys_pwrite(struct drm_device *dev, 4017 struct drm_i915_gem_object *obj, 4018 struct drm_i915_gem_pwrite *args, 4019 struct drm_file *file_priv) 4020 { 4021 void *vaddr = (char *)obj->phys_obj->handle->vaddr + args->offset; 4022 char __user *user_data = (char __user *) (uintptr_t) args->data_ptr; 4023 4024 if (copyin_nofault(user_data, vaddr, args->size) != 0) { 4025 unsigned long unwritten; 4026 4027 /* The physical object once assigned is fixed for the lifetime 4028 * of the obj, so we can safely drop the lock and continue 4029 * to access vaddr. 4030 */ 4031 mutex_unlock(&dev->struct_mutex); 4032 unwritten = copy_from_user(vaddr, user_data, args->size); 4033 mutex_lock(&dev->struct_mutex); 4034 if (unwritten) 4035 return -EFAULT; 4036 } 4037 4038 i915_gem_chipset_flush(dev); 4039 return 0; 4040 } 4041 4042 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 4043 { 4044 struct drm_i915_file_private *file_priv = file->driver_priv; 4045 4046 /* Clean up our request list when the client is going away, so that 4047 * later retire_requests won't dereference our soon-to-be-gone 4048 * file_priv. 4049 */ 4050 spin_lock(&file_priv->mm.lock); 4051 while (!list_empty(&file_priv->mm.request_list)) { 4052 struct drm_i915_gem_request *request; 4053 4054 request = list_first_entry(&file_priv->mm.request_list, 4055 struct drm_i915_gem_request, 4056 client_list); 4057 list_del(&request->client_list); 4058 request->file_priv = NULL; 4059 } 4060 spin_unlock(&file_priv->mm.lock); 4061 } 4062 4063 static int 4064 i915_gem_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, 4065 vm_ooffset_t foff, struct ucred *cred, u_short *color) 4066 { 4067 4068 *color = 0; /* XXXKIB */ 4069 return (0); 4070 } 4071 4072 int i915_intr_pf; 4073 4074 static int 4075 i915_gem_pager_fault(vm_object_t vm_obj, vm_ooffset_t offset, int prot, 4076 vm_page_t *mres) 4077 { 4078 struct drm_gem_object *gem_obj; 4079 struct drm_i915_gem_object *obj; 4080 struct drm_device *dev; 4081 drm_i915_private_t *dev_priv; 4082 vm_page_t m, oldm; 4083 int cause, ret; 4084 bool write; 4085 4086 gem_obj = vm_obj->handle; 4087 obj = to_intel_bo(gem_obj); 4088 dev = obj->base.dev; 4089 dev_priv = dev->dev_private; 4090 #if 0 4091 write = (prot & VM_PROT_WRITE) != 0; 4092 #else 4093 write = true; 4094 #endif 4095 vm_object_pip_add(vm_obj, 1); 4096 4097 /* 4098 * Remove the placeholder page inserted by vm_fault() from the 4099 * object before dropping the object lock. If 4100 * i915_gem_release_mmap() is active in parallel on this gem 4101 * object, then it owns the drm device sx and might find the 4102 * placeholder already. Then, since the page is busy, 4103 * i915_gem_release_mmap() sleeps waiting for the busy state 4104 * of the page cleared. We will be not able to acquire drm 4105 * device lock until i915_gem_release_mmap() is able to make a 4106 * progress. 4107 */ 4108 if (*mres != NULL) { 4109 oldm = *mres; 4110 vm_page_remove(oldm); 4111 *mres = NULL; 4112 } else 4113 oldm = NULL; 4114 retry: 4115 VM_OBJECT_UNLOCK(vm_obj); 4116 unlocked_vmobj: 4117 cause = ret = 0; 4118 m = NULL; 4119 4120 if (i915_intr_pf) { 4121 ret = i915_mutex_lock_interruptible(dev); 4122 if (ret != 0) { 4123 cause = 10; 4124 goto out; 4125 } 4126 } else 4127 mutex_lock(&dev->struct_mutex); 4128 4129 /* 4130 * Since the object lock was dropped, other thread might have 4131 * faulted on the same GTT address and instantiated the 4132 * mapping for the page. Recheck. 4133 */ 4134 VM_OBJECT_LOCK(vm_obj); 4135 m = vm_page_lookup(vm_obj, OFF_TO_IDX(offset)); 4136 if (m != NULL) { 4137 if ((m->flags & PG_BUSY) != 0) { 4138 mutex_unlock(&dev->struct_mutex); 4139 #if 0 /* XXX */ 4140 vm_page_sleep(m, "915pee"); 4141 #endif 4142 goto retry; 4143 } 4144 goto have_page; 4145 } else 4146 VM_OBJECT_UNLOCK(vm_obj); 4147 4148 /* Access to snoopable pages through the GTT is incoherent. */ 4149 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) { 4150 ret = -EINVAL; 4151 goto unlock; 4152 } 4153 4154 /* Now bind it into the GTT if needed */ 4155 if (!obj->map_and_fenceable) { 4156 ret = i915_gem_object_unbind(obj); 4157 if (ret != 0) { 4158 cause = 20; 4159 goto unlock; 4160 } 4161 } 4162 if (!obj->gtt_space) { 4163 ret = i915_gem_object_bind_to_gtt(obj, 0, true, false); 4164 if (ret != 0) { 4165 cause = 30; 4166 goto unlock; 4167 } 4168 4169 ret = i915_gem_object_set_to_gtt_domain(obj, write); 4170 if (ret != 0) { 4171 cause = 40; 4172 goto unlock; 4173 } 4174 } 4175 4176 if (obj->tiling_mode == I915_TILING_NONE) 4177 ret = i915_gem_object_put_fence(obj); 4178 else 4179 ret = i915_gem_object_get_fence(obj); 4180 if (ret != 0) { 4181 cause = 50; 4182 goto unlock; 4183 } 4184 4185 if (i915_gem_object_is_inactive(obj)) 4186 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list); 4187 4188 obj->fault_mappable = true; 4189 VM_OBJECT_LOCK(vm_obj); 4190 m = vm_phys_fictitious_to_vm_page(dev->agp->base + obj->gtt_offset + 4191 offset); 4192 if (m == NULL) { 4193 cause = 60; 4194 ret = -EFAULT; 4195 goto unlock; 4196 } 4197 KASSERT((m->flags & PG_FICTITIOUS) != 0, 4198 ("not fictitious %p", m)); 4199 KASSERT(m->wire_count == 1, ("wire_count not 1 %p", m)); 4200 4201 if ((m->flags & PG_BUSY) != 0) { 4202 mutex_unlock(&dev->struct_mutex); 4203 #if 0 /* XXX */ 4204 vm_page_sleep(m, "915pbs"); 4205 #endif 4206 goto retry; 4207 } 4208 m->valid = VM_PAGE_BITS_ALL; 4209 vm_page_insert(m, vm_obj, OFF_TO_IDX(offset)); 4210 have_page: 4211 *mres = m; 4212 vm_page_busy_try(m, false); 4213 4214 mutex_unlock(&dev->struct_mutex); 4215 if (oldm != NULL) { 4216 vm_page_free(oldm); 4217 } 4218 vm_object_pip_wakeup(vm_obj); 4219 return (VM_PAGER_OK); 4220 4221 unlock: 4222 mutex_unlock(&dev->struct_mutex); 4223 out: 4224 KASSERT(ret != 0, ("i915_gem_pager_fault: wrong return")); 4225 if (ret == -EAGAIN || ret == -EIO || ret == -EINTR) { 4226 goto unlocked_vmobj; 4227 } 4228 VM_OBJECT_LOCK(vm_obj); 4229 vm_object_pip_wakeup(vm_obj); 4230 return (VM_PAGER_ERROR); 4231 } 4232 4233 static void 4234 i915_gem_pager_dtor(void *handle) 4235 { 4236 struct drm_gem_object *obj; 4237 struct drm_device *dev; 4238 4239 obj = handle; 4240 dev = obj->dev; 4241 4242 mutex_lock(&dev->struct_mutex); 4243 drm_gem_free_mmap_offset(obj); 4244 i915_gem_release_mmap(to_intel_bo(obj)); 4245 drm_gem_object_unreference(obj); 4246 mutex_unlock(&dev->struct_mutex); 4247 } 4248 4249 struct cdev_pager_ops i915_gem_pager_ops = { 4250 .cdev_pg_fault = i915_gem_pager_fault, 4251 .cdev_pg_ctor = i915_gem_pager_ctor, 4252 .cdev_pg_dtor = i915_gem_pager_dtor 4253 }; 4254 4255 #define GEM_PARANOID_CHECK_GTT 0 4256 #if GEM_PARANOID_CHECK_GTT 4257 static void 4258 i915_gem_assert_pages_not_mapped(struct drm_device *dev, vm_page_t *ma, 4259 int page_count) 4260 { 4261 struct drm_i915_private *dev_priv; 4262 vm_paddr_t pa; 4263 unsigned long start, end; 4264 u_int i; 4265 int j; 4266 4267 dev_priv = dev->dev_private; 4268 start = OFF_TO_IDX(dev_priv->mm.gtt_start); 4269 end = OFF_TO_IDX(dev_priv->mm.gtt_end); 4270 for (i = start; i < end; i++) { 4271 pa = intel_gtt_read_pte_paddr(i); 4272 for (j = 0; j < page_count; j++) { 4273 if (pa == VM_PAGE_TO_PHYS(ma[j])) { 4274 panic("Page %p in GTT pte index %d pte %x", 4275 ma[i], i, intel_gtt_read_pte(i)); 4276 } 4277 } 4278 } 4279 } 4280 #endif 4281 4282 static int 4283 i915_gpu_is_active(struct drm_device *dev) 4284 { 4285 drm_i915_private_t *dev_priv = dev->dev_private; 4286 4287 return !list_empty(&dev_priv->mm.active_list); 4288 } 4289 4290 static void 4291 i915_gem_lowmem(void *arg) 4292 { 4293 struct drm_device *dev; 4294 struct drm_i915_private *dev_priv; 4295 struct drm_i915_gem_object *obj, *next; 4296 int cnt, cnt_fail, cnt_total; 4297 4298 dev = arg; 4299 dev_priv = dev->dev_private; 4300 4301 if (lockmgr(&dev->struct_mutex, LK_EXCLUSIVE|LK_NOWAIT)) 4302 return; 4303 4304 rescan: 4305 /* first scan for clean buffers */ 4306 i915_gem_retire_requests(dev); 4307 4308 cnt_total = cnt_fail = cnt = 0; 4309 4310 list_for_each_entry_safe(obj, next, &dev_priv->mm.inactive_list, 4311 mm_list) { 4312 if (i915_gem_object_is_purgeable(obj)) { 4313 if (i915_gem_object_unbind(obj) != 0) 4314 cnt_total++; 4315 } else 4316 cnt_total++; 4317 } 4318 4319 /* second pass, evict/count anything still on the inactive list */ 4320 list_for_each_entry_safe(obj, next, &dev_priv->mm.inactive_list, 4321 mm_list) { 4322 if (i915_gem_object_unbind(obj) == 0) 4323 cnt++; 4324 else 4325 cnt_fail++; 4326 } 4327 4328 if (cnt_fail > cnt_total / 100 && i915_gpu_is_active(dev)) { 4329 /* 4330 * We are desperate for pages, so as a last resort, wait 4331 * for the GPU to finish and discard whatever we can. 4332 * This has a dramatic impact to reduce the number of 4333 * OOM-killer events whilst running the GPU aggressively. 4334 */ 4335 if (i915_gpu_idle(dev) == 0) 4336 goto rescan; 4337 } 4338 mutex_unlock(&dev->struct_mutex); 4339 } 4340