1 /* 2 * Copyright © 2008 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 * Copyright (c) 2011 The FreeBSD Foundation 27 * All rights reserved. 28 * 29 * This software was developed by Konstantin Belousov under sponsorship from 30 * the FreeBSD Foundation. 31 * 32 * Redistribution and use in source and binary forms, with or without 33 * modification, are permitted provided that the following conditions 34 * are met: 35 * 1. Redistributions of source code must retain the above copyright 36 * notice, this list of conditions and the following disclaimer. 37 * 2. Redistributions in binary form must reproduce the above copyright 38 * notice, this list of conditions and the following disclaimer in the 39 * documentation and/or other materials provided with the distribution. 40 * 41 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 51 * SUCH DAMAGE. 52 * 53 */ 54 55 #include <sys/resourcevar.h> 56 #include <sys/sfbuf.h> 57 58 #include <drm/drmP.h> 59 #include <drm/i915_drm.h> 60 #include "i915_drv.h" 61 #include "intel_drv.h" 62 #include <linux/shmem_fs.h> 63 #include <linux/completion.h> 64 #include <linux/highmem.h> 65 #include <linux/jiffies.h> 66 #include <linux/time.h> 67 68 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); 69 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); 70 static __must_check int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, 71 unsigned alignment, 72 bool map_and_fenceable, 73 bool nonblocking); 74 static int i915_gem_phys_pwrite(struct drm_device *dev, 75 struct drm_i915_gem_object *obj, 76 struct drm_i915_gem_pwrite *args, 77 struct drm_file *file); 78 79 static void i915_gem_write_fence(struct drm_device *dev, int reg, 80 struct drm_i915_gem_object *obj); 81 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, 82 struct drm_i915_fence_reg *fence, 83 bool enable); 84 85 static long i915_gem_purge(struct drm_i915_private *dev_priv, long target); 86 87 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj); 88 static void i915_gem_object_truncate(struct drm_i915_gem_object *obj); 89 90 static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) 91 { 92 if (obj->tiling_mode) 93 i915_gem_release_mmap(obj); 94 95 /* As we do not have an associated fence register, we will force 96 * a tiling change if we ever need to acquire one. 97 */ 98 obj->fence_dirty = false; 99 obj->fence_reg = I915_FENCE_REG_NONE; 100 } 101 102 static bool i915_gem_object_is_inactive(struct drm_i915_gem_object *obj); 103 static void i915_gem_lowmem(void *arg); 104 105 /* some bookkeeping */ 106 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 107 size_t size) 108 { 109 dev_priv->mm.object_count++; 110 dev_priv->mm.object_memory += size; 111 } 112 113 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 114 size_t size) 115 { 116 dev_priv->mm.object_count--; 117 dev_priv->mm.object_memory -= size; 118 } 119 120 static int 121 i915_gem_wait_for_error(struct i915_gpu_error *error) 122 { 123 int ret; 124 125 #define EXIT_COND (!i915_reset_in_progress(error) || \ 126 i915_terminally_wedged(error)) 127 if (EXIT_COND) 128 return 0; 129 130 /* 131 * Only wait 10 seconds for the gpu reset to complete to avoid hanging 132 * userspace. If it takes that long something really bad is going on and 133 * we should simply try to bail out and fail as gracefully as possible. 134 */ 135 ret = wait_event_interruptible_timeout(error->reset_queue, 136 EXIT_COND, 137 10*HZ); 138 if (ret == 0) { 139 DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); 140 return -EIO; 141 } else if (ret < 0) { 142 return ret; 143 } 144 #undef EXIT_COND 145 146 return 0; 147 } 148 149 int i915_mutex_lock_interruptible(struct drm_device *dev) 150 { 151 struct drm_i915_private *dev_priv = dev->dev_private; 152 int ret; 153 154 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 155 if (ret) 156 return ret; 157 158 ret = lockmgr(&dev->struct_mutex, LK_EXCLUSIVE|LK_SLEEPFAIL); 159 if (ret) 160 return -EINTR; 161 162 WARN_ON(i915_verify_lists(dev)); 163 return 0; 164 } 165 166 static inline bool 167 i915_gem_object_is_inactive(struct drm_i915_gem_object *obj) 168 { 169 return !obj->active; 170 } 171 172 int 173 i915_gem_init_ioctl(struct drm_device *dev, void *data, 174 struct drm_file *file) 175 { 176 struct drm_i915_gem_init *args = data; 177 178 if (drm_core_check_feature(dev, DRIVER_MODESET)) 179 return -ENODEV; 180 181 if (args->gtt_start >= args->gtt_end || 182 (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1)) 183 return -EINVAL; 184 185 /* GEM with user mode setting was never supported on ilk and later. */ 186 if (INTEL_INFO(dev)->gen >= 5) 187 return -ENODEV; 188 189 mutex_lock(&dev->struct_mutex); 190 i915_gem_setup_global_gtt(dev, args->gtt_start, args->gtt_end, 191 args->gtt_end); 192 mutex_unlock(&dev->struct_mutex); 193 194 return 0; 195 } 196 197 int 198 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 199 struct drm_file *file) 200 { 201 struct drm_i915_private *dev_priv = dev->dev_private; 202 struct drm_i915_gem_get_aperture *args = data; 203 struct drm_i915_gem_object *obj; 204 size_t pinned; 205 206 pinned = 0; 207 mutex_lock(&dev->struct_mutex); 208 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) 209 if (obj->pin_count) 210 pinned += obj->gtt_space->size; 211 mutex_unlock(&dev->struct_mutex); 212 213 args->aper_size = dev_priv->gtt.total; 214 args->aper_available_size = args->aper_size - pinned; 215 216 return 0; 217 } 218 219 void i915_gem_object_free(struct drm_i915_gem_object *obj) 220 { 221 kfree(obj); 222 } 223 224 static int 225 i915_gem_create(struct drm_file *file, 226 struct drm_device *dev, 227 uint64_t size, 228 uint32_t *handle_p) 229 { 230 struct drm_i915_gem_object *obj; 231 int ret; 232 u32 handle; 233 234 size = roundup(size, PAGE_SIZE); 235 if (size == 0) 236 return -EINVAL; 237 238 /* Allocate the new object */ 239 obj = i915_gem_alloc_object(dev, size); 240 if (obj == NULL) 241 return -ENOMEM; 242 243 handle = 0; 244 ret = drm_gem_handle_create(file, &obj->base, &handle); 245 if (ret) { 246 drm_gem_object_release(&obj->base); 247 i915_gem_info_remove_obj(dev->dev_private, obj->base.size); 248 drm_free(obj, M_DRM); 249 return (-ret); 250 } 251 252 /* drop reference from allocate - handle holds it now */ 253 drm_gem_object_unreference(&obj->base); 254 *handle_p = handle; 255 return 0; 256 } 257 258 int 259 i915_gem_dumb_create(struct drm_file *file, 260 struct drm_device *dev, 261 struct drm_mode_create_dumb *args) 262 { 263 264 /* have to work out size/pitch and return them */ 265 args->pitch = roundup2(args->width * ((args->bpp + 7) / 8), 64); 266 args->size = args->pitch * args->height; 267 return i915_gem_create(file, dev, 268 args->size, &args->handle); 269 } 270 271 int i915_gem_dumb_destroy(struct drm_file *file, 272 struct drm_device *dev, 273 uint32_t handle) 274 { 275 276 return drm_gem_handle_delete(file, handle); 277 } 278 279 /** 280 * Creates a new mm object and returns a handle to it. 281 */ 282 int 283 i915_gem_create_ioctl(struct drm_device *dev, void *data, 284 struct drm_file *file) 285 { 286 struct drm_i915_gem_create *args = data; 287 288 return i915_gem_create(file, dev, 289 args->size, &args->handle); 290 } 291 292 static inline void vm_page_reference(vm_page_t m) 293 { 294 vm_page_flag_set(m, PG_REFERENCED); 295 } 296 297 static int 298 i915_gem_shmem_pread(struct drm_device *dev, 299 struct drm_i915_gem_object *obj, 300 struct drm_i915_gem_pread *args, 301 struct drm_file *file) 302 { 303 vm_object_t vm_obj; 304 vm_page_t m; 305 struct sf_buf *sf; 306 vm_offset_t mkva; 307 vm_pindex_t obj_pi; 308 int cnt, do_bit17_swizzling, length, obj_po, ret, swizzled_po; 309 310 do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 311 312 obj->dirty = 1; 313 vm_obj = obj->base.vm_obj; 314 ret = 0; 315 316 VM_OBJECT_LOCK(vm_obj); 317 vm_object_pip_add(vm_obj, 1); 318 while (args->size > 0) { 319 obj_pi = OFF_TO_IDX(args->offset); 320 obj_po = args->offset & PAGE_MASK; 321 322 m = shmem_read_mapping_page(vm_obj, obj_pi); 323 VM_OBJECT_UNLOCK(vm_obj); 324 325 sf = sf_buf_alloc(m); 326 mkva = sf_buf_kva(sf); 327 length = min(args->size, PAGE_SIZE - obj_po); 328 while (length > 0) { 329 if (do_bit17_swizzling && 330 (VM_PAGE_TO_PHYS(m) & (1 << 17)) != 0) { 331 cnt = roundup2(obj_po + 1, 64); 332 cnt = min(cnt - obj_po, length); 333 swizzled_po = obj_po ^ 64; 334 } else { 335 cnt = length; 336 swizzled_po = obj_po; 337 } 338 ret = -copyout_nofault( 339 (char *)mkva + swizzled_po, 340 (void *)(uintptr_t)args->data_ptr, cnt); 341 if (ret != 0) 342 break; 343 args->data_ptr += cnt; 344 args->size -= cnt; 345 length -= cnt; 346 args->offset += cnt; 347 obj_po += cnt; 348 } 349 sf_buf_free(sf); 350 VM_OBJECT_LOCK(vm_obj); 351 vm_page_reference(m); 352 vm_page_busy_wait(m, FALSE, "i915gem"); 353 vm_page_unwire(m, 1); 354 vm_page_wakeup(m); 355 356 if (ret != 0) 357 break; 358 } 359 vm_object_pip_wakeup(vm_obj); 360 VM_OBJECT_UNLOCK(vm_obj); 361 362 return (ret); 363 } 364 365 /** 366 * Reads data from the object referenced by handle. 367 * 368 * On error, the contents of *data are undefined. 369 */ 370 int 371 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 372 struct drm_file *file) 373 { 374 struct drm_i915_gem_pread *args = data; 375 struct drm_i915_gem_object *obj; 376 int ret = 0; 377 378 if (args->size == 0) 379 return 0; 380 381 ret = i915_mutex_lock_interruptible(dev); 382 if (ret) 383 return ret; 384 385 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 386 if (&obj->base == NULL) { 387 ret = -ENOENT; 388 goto unlock; 389 } 390 391 /* Bounds check source. */ 392 if (args->offset > obj->base.size || 393 args->size > obj->base.size - args->offset) { 394 ret = -EINVAL; 395 goto out; 396 } 397 398 ret = i915_gem_shmem_pread(dev, obj, args, file); 399 out: 400 drm_gem_object_unreference(&obj->base); 401 unlock: 402 mutex_unlock(&dev->struct_mutex); 403 return ret; 404 } 405 406 #if 0 407 /* This is the fast write path which cannot handle 408 * page faults in the source data 409 */ 410 411 static inline int 412 fast_user_write(struct io_mapping *mapping, 413 loff_t page_base, int page_offset, 414 char __user *user_data, 415 int length) 416 { 417 void __iomem *vaddr_atomic; 418 void *vaddr; 419 unsigned long unwritten; 420 421 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); 422 /* We can use the cpu mem copy function because this is X86. */ 423 vaddr = (void __force*)vaddr_atomic + page_offset; 424 unwritten = __copy_from_user_inatomic_nocache(vaddr, 425 user_data, length); 426 io_mapping_unmap_atomic(vaddr_atomic); 427 return unwritten; 428 } 429 430 /** 431 * This is the fast pwrite path, where we copy the data directly from the 432 * user into the GTT, uncached. 433 */ 434 static int 435 i915_gem_gtt_pwrite_fast(struct drm_device *dev, 436 struct drm_i915_gem_object *obj, 437 struct drm_i915_gem_pwrite *args, 438 struct drm_file *file) 439 { 440 drm_i915_private_t *dev_priv = dev->dev_private; 441 ssize_t remain; 442 loff_t offset, page_base; 443 char __user *user_data; 444 int page_offset, page_length, ret; 445 446 ret = i915_gem_object_pin(obj, 0, true, true); 447 if (ret) 448 goto out; 449 450 ret = i915_gem_object_set_to_gtt_domain(obj, true); 451 if (ret) 452 goto out_unpin; 453 454 ret = i915_gem_object_put_fence(obj); 455 if (ret) 456 goto out_unpin; 457 458 user_data = to_user_ptr(args->data_ptr); 459 remain = args->size; 460 461 offset = obj->gtt_offset + args->offset; 462 463 while (remain > 0) { 464 /* Operation in this page 465 * 466 * page_base = page offset within aperture 467 * page_offset = offset within page 468 * page_length = bytes to copy for this page 469 */ 470 page_base = offset & PAGE_MASK; 471 page_offset = offset_in_page(offset); 472 page_length = remain; 473 if ((page_offset + remain) > PAGE_SIZE) 474 page_length = PAGE_SIZE - page_offset; 475 476 /* If we get a fault while copying data, then (presumably) our 477 * source page isn't available. Return the error and we'll 478 * retry in the slow path. 479 */ 480 if (fast_user_write(dev_priv->gtt.mappable, page_base, 481 page_offset, user_data, page_length)) { 482 ret = -EFAULT; 483 goto out_unpin; 484 } 485 486 remain -= page_length; 487 user_data += page_length; 488 offset += page_length; 489 } 490 491 out_unpin: 492 i915_gem_object_unpin(obj); 493 out: 494 return ret; 495 } 496 #endif 497 498 static int 499 i915_gem_gtt_write(struct drm_device *dev, struct drm_i915_gem_object *obj, 500 uint64_t data_ptr, uint64_t size, uint64_t offset, struct drm_file *file) 501 { 502 vm_offset_t mkva; 503 int ret; 504 505 /* 506 * Pass the unaligned physical address and size to pmap_mapdev_attr() 507 * so it can properly calculate whether an extra page needs to be 508 * mapped or not to cover the requested range. The function will 509 * add the page offset into the returned mkva for us. 510 */ 511 mkva = (vm_offset_t)pmap_mapdev_attr(dev->agp->base + obj->gtt_offset + 512 offset, size, PAT_WRITE_COMBINING); 513 ret = -copyin_nofault((void *)(uintptr_t)data_ptr, (char *)mkva, size); 514 pmap_unmapdev(mkva, size); 515 return ret; 516 } 517 518 static int 519 i915_gem_shmem_pwrite(struct drm_device *dev, 520 struct drm_i915_gem_object *obj, 521 struct drm_i915_gem_pwrite *args, 522 struct drm_file *file) 523 { 524 vm_object_t vm_obj; 525 vm_page_t m; 526 struct sf_buf *sf; 527 vm_offset_t mkva; 528 vm_pindex_t obj_pi; 529 int cnt, do_bit17_swizzling, length, obj_po, ret, swizzled_po; 530 531 do_bit17_swizzling = 0; 532 533 obj->dirty = 1; 534 vm_obj = obj->base.vm_obj; 535 ret = 0; 536 537 VM_OBJECT_LOCK(vm_obj); 538 vm_object_pip_add(vm_obj, 1); 539 while (args->size > 0) { 540 obj_pi = OFF_TO_IDX(args->offset); 541 obj_po = args->offset & PAGE_MASK; 542 543 m = shmem_read_mapping_page(vm_obj, obj_pi); 544 VM_OBJECT_UNLOCK(vm_obj); 545 546 sf = sf_buf_alloc(m); 547 mkva = sf_buf_kva(sf); 548 length = min(args->size, PAGE_SIZE - obj_po); 549 while (length > 0) { 550 if (do_bit17_swizzling && 551 (VM_PAGE_TO_PHYS(m) & (1 << 17)) != 0) { 552 cnt = roundup2(obj_po + 1, 64); 553 cnt = min(cnt - obj_po, length); 554 swizzled_po = obj_po ^ 64; 555 } else { 556 cnt = length; 557 swizzled_po = obj_po; 558 } 559 ret = -copyin_nofault( 560 (void *)(uintptr_t)args->data_ptr, 561 (char *)mkva + swizzled_po, cnt); 562 if (ret != 0) 563 break; 564 args->data_ptr += cnt; 565 args->size -= cnt; 566 length -= cnt; 567 args->offset += cnt; 568 obj_po += cnt; 569 } 570 sf_buf_free(sf); 571 VM_OBJECT_LOCK(vm_obj); 572 vm_page_dirty(m); 573 vm_page_reference(m); 574 vm_page_busy_wait(m, FALSE, "i915gem"); 575 vm_page_unwire(m, 1); 576 vm_page_wakeup(m); 577 578 if (ret != 0) 579 break; 580 } 581 vm_object_pip_wakeup(vm_obj); 582 VM_OBJECT_UNLOCK(vm_obj); 583 584 return (ret); 585 } 586 587 /** 588 * Writes data to the object referenced by handle. 589 * 590 * On error, the contents of the buffer that were to be modified are undefined. 591 */ 592 int 593 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 594 struct drm_file *file) 595 { 596 struct drm_i915_gem_pwrite *args = data; 597 struct drm_i915_gem_object *obj; 598 vm_page_t *ma; 599 vm_offset_t start, end; 600 int npages, ret; 601 602 if (args->size == 0) 603 return 0; 604 605 start = trunc_page(args->data_ptr); 606 end = round_page(args->data_ptr + args->size); 607 npages = howmany(end - start, PAGE_SIZE); 608 ma = kmalloc(npages * sizeof(vm_page_t), M_DRM, M_WAITOK | 609 M_ZERO); 610 npages = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map, 611 (vm_offset_t)args->data_ptr, args->size, 612 VM_PROT_READ, ma, npages); 613 if (npages == -1) { 614 ret = -EFAULT; 615 goto free_ma; 616 } 617 618 ret = i915_mutex_lock_interruptible(dev); 619 if (ret != 0) 620 goto unlocked; 621 622 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 623 if (&obj->base == NULL) { 624 ret = -ENOENT; 625 goto unlock; 626 } 627 628 /* Bounds check destination. */ 629 if (args->offset > obj->base.size || 630 args->size > obj->base.size - args->offset) { 631 ret = -EINVAL; 632 goto out; 633 } 634 635 if (obj->phys_obj) { 636 ret = i915_gem_phys_pwrite(dev, obj, args, file); 637 } else if (obj->gtt_space && 638 obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 639 ret = i915_gem_object_pin(obj, 0, true, false); 640 if (ret != 0) 641 goto out; 642 ret = i915_gem_object_set_to_gtt_domain(obj, true); 643 if (ret != 0) 644 goto out_unpin; 645 ret = i915_gem_object_put_fence(obj); 646 if (ret != 0) 647 goto out_unpin; 648 ret = i915_gem_gtt_write(dev, obj, args->data_ptr, args->size, 649 args->offset, file); 650 out_unpin: 651 i915_gem_object_unpin(obj); 652 } else { 653 ret = i915_gem_object_set_to_cpu_domain(obj, true); 654 if (ret != 0) 655 goto out; 656 ret = i915_gem_shmem_pwrite(dev, obj, args, file); 657 } 658 out: 659 drm_gem_object_unreference(&obj->base); 660 unlock: 661 mutex_unlock(&dev->struct_mutex); 662 unlocked: 663 vm_page_unhold_pages(ma, npages); 664 free_ma: 665 drm_free(ma, M_DRM); 666 return ret; 667 } 668 669 int 670 i915_gem_check_wedge(struct i915_gpu_error *error, 671 bool interruptible) 672 { 673 if (i915_reset_in_progress(error)) { 674 /* Non-interruptible callers can't handle -EAGAIN, hence return 675 * -EIO unconditionally for these. */ 676 if (!interruptible) 677 return -EIO; 678 679 /* Recovery complete, but the reset failed ... */ 680 if (i915_terminally_wedged(error)) 681 return -EIO; 682 683 return -EAGAIN; 684 } 685 686 return 0; 687 } 688 689 /* 690 * Compare seqno against outstanding lazy request. Emit a request if they are 691 * equal. 692 */ 693 static int 694 i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno) 695 { 696 int ret; 697 698 DRM_LOCK_ASSERT(ring->dev); 699 700 ret = 0; 701 if (seqno == ring->outstanding_lazy_request) 702 ret = i915_add_request(ring, NULL); 703 704 return ret; 705 } 706 707 /** 708 * __wait_seqno - wait until execution of seqno has finished 709 * @ring: the ring expected to report seqno 710 * @seqno: duh! 711 * @reset_counter: reset sequence associated with the given seqno 712 * @interruptible: do an interruptible wait (normally yes) 713 * @timeout: in - how long to wait (NULL forever); out - how much time remaining 714 * 715 * Note: It is of utmost importance that the passed in seqno and reset_counter 716 * values have been read by the caller in an smp safe manner. Where read-side 717 * locks are involved, it is sufficient to read the reset_counter before 718 * unlocking the lock that protects the seqno. For lockless tricks, the 719 * reset_counter _must_ be read before, and an appropriate smp_rmb must be 720 * inserted. 721 * 722 * Returns 0 if the seqno was found within the alloted time. Else returns the 723 * errno with remaining time filled in timeout argument. 724 */ 725 static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, 726 unsigned reset_counter, 727 bool interruptible, struct timespec *timeout) 728 { 729 drm_i915_private_t *dev_priv = ring->dev->dev_private; 730 struct timespec before, now, wait_time={1,0}; 731 unsigned long timeout_jiffies; 732 long end; 733 bool wait_forever = true; 734 int ret; 735 736 if (i915_seqno_passed(ring->get_seqno(ring, true), seqno)) 737 return 0; 738 739 if (timeout != NULL) { 740 wait_time = *timeout; 741 wait_forever = false; 742 } 743 744 timeout_jiffies = timespec_to_jiffies_timeout(&wait_time); 745 746 if (WARN_ON(!ring->irq_get(ring))) 747 return -ENODEV; 748 749 /* Record current time in case interrupted by signal, or wedged * */ 750 getrawmonotonic(&before); 751 752 #define EXIT_COND \ 753 (i915_seqno_passed(ring->get_seqno(ring, false), seqno) || \ 754 i915_reset_in_progress(&dev_priv->gpu_error) || \ 755 reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) 756 do { 757 if (interruptible) 758 end = wait_event_interruptible_timeout(ring->irq_queue, 759 EXIT_COND, 760 timeout_jiffies); 761 else 762 end = wait_event_timeout(ring->irq_queue, EXIT_COND, 763 timeout_jiffies); 764 765 /* We need to check whether any gpu reset happened in between 766 * the caller grabbing the seqno and now ... */ 767 if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) 768 end = -EAGAIN; 769 770 /* ... but upgrade the -EGAIN to an -EIO if the gpu is truely 771 * gone. */ 772 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); 773 if (ret) 774 end = ret; 775 } while (end == 0 && wait_forever); 776 777 getrawmonotonic(&now); 778 779 ring->irq_put(ring); 780 #undef EXIT_COND 781 782 if (timeout) { 783 struct timespec sleep_time = timespec_sub(now, before); 784 *timeout = timespec_sub(*timeout, sleep_time); 785 if (!timespec_valid(timeout)) /* i.e. negative time remains */ 786 set_normalized_timespec(timeout, 0, 0); 787 } 788 789 switch (end) { 790 case -EIO: 791 case -EAGAIN: /* Wedged */ 792 case -ERESTARTSYS: /* Signal */ 793 return (int)end; 794 case 0: /* Timeout */ 795 return -ETIMEDOUT; /* -ETIME on Linux */ 796 default: /* Completed */ 797 WARN_ON(end < 0); /* We're not aware of other errors */ 798 return 0; 799 } 800 } 801 802 /** 803 * Waits for a sequence number to be signaled, and cleans up the 804 * request and object lists appropriately for that event. 805 */ 806 int 807 i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno) 808 { 809 struct drm_device *dev = ring->dev; 810 struct drm_i915_private *dev_priv = dev->dev_private; 811 bool interruptible = dev_priv->mm.interruptible; 812 int ret; 813 814 DRM_LOCK_ASSERT(dev); 815 BUG_ON(seqno == 0); 816 817 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); 818 if (ret) 819 return ret; 820 821 ret = i915_gem_check_olr(ring, seqno); 822 if (ret) 823 return ret; 824 825 return __wait_seqno(ring, seqno, 826 atomic_read(&dev_priv->gpu_error.reset_counter), 827 interruptible, NULL); 828 } 829 830 static int 831 i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj, 832 struct intel_ring_buffer *ring) 833 { 834 i915_gem_retire_requests_ring(ring); 835 836 /* Manually manage the write flush as we may have not yet 837 * retired the buffer. 838 * 839 * Note that the last_write_seqno is always the earlier of 840 * the two (read/write) seqno, so if we haved successfully waited, 841 * we know we have passed the last write. 842 */ 843 obj->last_write_seqno = 0; 844 obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; 845 846 return 0; 847 } 848 849 /** 850 * Ensures that all rendering to the object has completed and the object is 851 * safe to unbind from the GTT or access from the CPU. 852 */ 853 static __must_check int 854 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, 855 bool readonly) 856 { 857 struct intel_ring_buffer *ring = obj->ring; 858 u32 seqno; 859 int ret; 860 861 seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno; 862 if (seqno == 0) 863 return 0; 864 865 ret = i915_wait_seqno(ring, seqno); 866 if (ret) 867 return ret; 868 869 return i915_gem_object_wait_rendering__tail(obj, ring); 870 } 871 872 /* A nonblocking variant of the above wait. This is a highly dangerous routine 873 * as the object state may change during this call. 874 */ 875 static __must_check int 876 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, 877 bool readonly) 878 { 879 struct drm_device *dev = obj->base.dev; 880 struct drm_i915_private *dev_priv = dev->dev_private; 881 struct intel_ring_buffer *ring = obj->ring; 882 unsigned reset_counter; 883 u32 seqno; 884 int ret; 885 886 DRM_LOCK_ASSERT(dev); 887 BUG_ON(!dev_priv->mm.interruptible); 888 889 seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno; 890 if (seqno == 0) 891 return 0; 892 893 ret = i915_gem_check_wedge(&dev_priv->gpu_error, true); 894 if (ret) 895 return ret; 896 897 ret = i915_gem_check_olr(ring, seqno); 898 if (ret) 899 return ret; 900 901 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 902 mutex_unlock(&dev->struct_mutex); 903 ret = __wait_seqno(ring, seqno, reset_counter, true, NULL); 904 mutex_lock(&dev->struct_mutex); 905 if (ret) 906 return ret; 907 908 return i915_gem_object_wait_rendering__tail(obj, ring); 909 } 910 911 /** 912 * Called when user space prepares to use an object with the CPU, either 913 * through the mmap ioctl's mapping or a GTT mapping. 914 */ 915 int 916 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 917 struct drm_file *file) 918 { 919 struct drm_i915_gem_set_domain *args = data; 920 struct drm_i915_gem_object *obj; 921 uint32_t read_domains = args->read_domains; 922 uint32_t write_domain = args->write_domain; 923 int ret; 924 925 /* Only handle setting domains to types used by the CPU. */ 926 if (write_domain & I915_GEM_GPU_DOMAINS) 927 return -EINVAL; 928 929 if (read_domains & I915_GEM_GPU_DOMAINS) 930 return -EINVAL; 931 932 /* Having something in the write domain implies it's in the read 933 * domain, and only that read domain. Enforce that in the request. 934 */ 935 if (write_domain != 0 && read_domains != write_domain) 936 return -EINVAL; 937 938 ret = i915_mutex_lock_interruptible(dev); 939 if (ret) 940 return ret; 941 942 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 943 if (&obj->base == NULL) { 944 ret = -ENOENT; 945 goto unlock; 946 } 947 948 /* Try to flush the object off the GPU without holding the lock. 949 * We will repeat the flush holding the lock in the normal manner 950 * to catch cases where we are gazumped. 951 */ 952 ret = i915_gem_object_wait_rendering__nonblocking(obj, !write_domain); 953 if (ret) 954 goto unref; 955 956 if (read_domains & I915_GEM_DOMAIN_GTT) { 957 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 958 959 /* Silently promote "you're not bound, there was nothing to do" 960 * to success, since the client was just asking us to 961 * make sure everything was done. 962 */ 963 if (ret == -EINVAL) 964 ret = 0; 965 } else { 966 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 967 } 968 969 unref: 970 drm_gem_object_unreference(&obj->base); 971 unlock: 972 mutex_unlock(&dev->struct_mutex); 973 return ret; 974 } 975 976 /** 977 * Called when user space has done writes to this buffer 978 */ 979 int 980 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 981 struct drm_file *file) 982 { 983 struct drm_i915_gem_sw_finish *args = data; 984 struct drm_i915_gem_object *obj; 985 int ret = 0; 986 987 ret = i915_mutex_lock_interruptible(dev); 988 if (ret) 989 return ret; 990 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 991 if (&obj->base == NULL) { 992 ret = -ENOENT; 993 goto unlock; 994 } 995 996 /* Pinned buffers may be scanout, so flush the cache */ 997 if (obj->pin_count) 998 i915_gem_object_flush_cpu_write_domain(obj); 999 1000 drm_gem_object_unreference(&obj->base); 1001 unlock: 1002 mutex_unlock(&dev->struct_mutex); 1003 return ret; 1004 } 1005 1006 /** 1007 * Maps the contents of an object, returning the address it is mapped 1008 * into. 1009 * 1010 * While the mapping holds a reference on the contents of the object, it doesn't 1011 * imply a ref on the object itself. 1012 */ 1013 int 1014 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1015 struct drm_file *file) 1016 { 1017 struct drm_i915_gem_mmap *args = data; 1018 struct drm_gem_object *obj; 1019 struct proc *p = curproc; 1020 vm_map_t map = &p->p_vmspace->vm_map; 1021 vm_offset_t addr; 1022 vm_size_t size; 1023 int error = 0, rv; 1024 1025 obj = drm_gem_object_lookup(dev, file, args->handle); 1026 if (obj == NULL) 1027 return -ENOENT; 1028 1029 if (args->size == 0) 1030 goto out; 1031 1032 size = round_page(args->size); 1033 if (map->size + size > p->p_rlimit[RLIMIT_VMEM].rlim_cur) { 1034 error = ENOMEM; 1035 goto out; 1036 } 1037 1038 addr = 0; 1039 vm_object_hold(obj->vm_obj); 1040 vm_object_reference_locked(obj->vm_obj); 1041 vm_object_drop(obj->vm_obj); 1042 rv = vm_map_find(map, obj->vm_obj, NULL, 1043 args->offset, &addr, args->size, 1044 PAGE_SIZE, /* align */ 1045 TRUE, /* fitit */ 1046 VM_MAPTYPE_NORMAL, /* maptype */ 1047 VM_PROT_READ | VM_PROT_WRITE, /* prot */ 1048 VM_PROT_READ | VM_PROT_WRITE, /* max */ 1049 MAP_SHARED /* cow */); 1050 if (rv != KERN_SUCCESS) { 1051 vm_object_deallocate(obj->vm_obj); 1052 error = -vm_mmap_to_errno(rv); 1053 } else { 1054 args->addr_ptr = (uint64_t)addr; 1055 } 1056 out: 1057 drm_gem_object_unreference(obj); 1058 return (error); 1059 } 1060 1061 /** 1062 * i915_gem_fault - fault a page into the GTT 1063 * vma: VMA in question 1064 * vmf: fault info 1065 * 1066 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1067 * from userspace. The fault handler takes care of binding the object to 1068 * the GTT (if needed), allocating and programming a fence register (again, 1069 * only if needed based on whether the old reg is still valid or the object 1070 * is tiled) and inserting a new PTE into the faulting process. 1071 * 1072 * Note that the faulting process may involve evicting existing objects 1073 * from the GTT and/or fence registers to make room. So performance may 1074 * suffer if the GTT working set is large or there are few fence registers 1075 * left. 1076 */ 1077 #if 0 1078 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1079 { 1080 struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data); 1081 struct drm_device *dev = obj->base.dev; 1082 drm_i915_private_t *dev_priv = dev->dev_private; 1083 pgoff_t page_offset; 1084 unsigned long pfn; 1085 int ret = 0; 1086 bool write = !!(vmf->flags & FAULT_FLAG_WRITE); 1087 1088 /* We don't use vmf->pgoff since that has the fake offset */ 1089 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >> 1090 PAGE_SHIFT; 1091 1092 ret = i915_mutex_lock_interruptible(dev); 1093 if (ret) 1094 goto out; 1095 1096 trace_i915_gem_object_fault(obj, page_offset, true, write); 1097 1098 /* Access to snoopable pages through the GTT is incoherent. */ 1099 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) { 1100 ret = -EINVAL; 1101 goto unlock; 1102 } 1103 1104 /* Now bind it into the GTT if needed */ 1105 ret = i915_gem_object_pin(obj, 0, true, false); 1106 if (ret) 1107 goto unlock; 1108 1109 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1110 if (ret) 1111 goto unpin; 1112 1113 ret = i915_gem_object_get_fence(obj); 1114 if (ret) 1115 goto unpin; 1116 1117 obj->fault_mappable = true; 1118 1119 pfn = ((dev_priv->gtt.mappable_base + obj->gtt_offset) >> PAGE_SHIFT) + 1120 page_offset; 1121 1122 /* Finally, remap it using the new GTT offset */ 1123 ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn); 1124 unpin: 1125 i915_gem_object_unpin(obj); 1126 unlock: 1127 mutex_unlock(&dev->struct_mutex); 1128 out: 1129 switch (ret) { 1130 case -EIO: 1131 /* If this -EIO is due to a gpu hang, give the reset code a 1132 * chance to clean up the mess. Otherwise return the proper 1133 * SIGBUS. */ 1134 if (i915_terminally_wedged(&dev_priv->gpu_error)) 1135 return VM_FAULT_SIGBUS; 1136 case -EAGAIN: 1137 /* Give the error handler a chance to run and move the 1138 * objects off the GPU active list. Next time we service the 1139 * fault, we should be able to transition the page into the 1140 * GTT without touching the GPU (and so avoid further 1141 * EIO/EGAIN). If the GPU is wedged, then there is no issue 1142 * with coherency, just lost writes. 1143 */ 1144 set_need_resched(); 1145 case 0: 1146 case -ERESTARTSYS: 1147 case -EINTR: 1148 case -EBUSY: 1149 /* 1150 * EBUSY is ok: this just means that another thread 1151 * already did the job. 1152 */ 1153 return VM_FAULT_NOPAGE; 1154 case -ENOMEM: 1155 return VM_FAULT_OOM; 1156 case -ENOSPC: 1157 return VM_FAULT_SIGBUS; 1158 default: 1159 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 1160 return VM_FAULT_SIGBUS; 1161 } 1162 } 1163 #endif 1164 1165 /** 1166 * i915_gem_release_mmap - remove physical page mappings 1167 * @obj: obj in question 1168 * 1169 * Preserve the reservation of the mmapping with the DRM core code, but 1170 * relinquish ownership of the pages back to the system. 1171 * 1172 * It is vital that we remove the page mapping if we have mapped a tiled 1173 * object through the GTT and then lose the fence register due to 1174 * resource pressure. Similarly if the object has been moved out of the 1175 * aperture, than pages mapped into userspace must be revoked. Removing the 1176 * mapping will then trigger a page fault on the next user access, allowing 1177 * fixup by i915_gem_fault(). 1178 */ 1179 void 1180 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 1181 { 1182 vm_object_t devobj; 1183 vm_page_t m; 1184 int i, page_count; 1185 1186 if (!obj->fault_mappable) 1187 return; 1188 1189 devobj = cdev_pager_lookup(obj); 1190 if (devobj != NULL) { 1191 page_count = OFF_TO_IDX(obj->base.size); 1192 1193 VM_OBJECT_LOCK(devobj); 1194 for (i = 0; i < page_count; i++) { 1195 m = vm_page_lookup_busy_wait(devobj, i, TRUE, "915unm"); 1196 if (m == NULL) 1197 continue; 1198 cdev_pager_free_page(devobj, m); 1199 } 1200 VM_OBJECT_UNLOCK(devobj); 1201 vm_object_deallocate(devobj); 1202 } 1203 1204 obj->fault_mappable = false; 1205 } 1206 1207 uint32_t 1208 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) 1209 { 1210 uint32_t gtt_size; 1211 1212 if (INTEL_INFO(dev)->gen >= 4 || 1213 tiling_mode == I915_TILING_NONE) 1214 return size; 1215 1216 /* Previous chips need a power-of-two fence region when tiling */ 1217 if (INTEL_INFO(dev)->gen == 3) 1218 gtt_size = 1024*1024; 1219 else 1220 gtt_size = 512*1024; 1221 1222 while (gtt_size < size) 1223 gtt_size <<= 1; 1224 1225 return gtt_size; 1226 } 1227 1228 /** 1229 * i915_gem_get_gtt_alignment - return required GTT alignment for an object 1230 * @obj: object to check 1231 * 1232 * Return the required GTT alignment for an object, taking into account 1233 * potential fence register mapping. 1234 */ 1235 uint32_t 1236 i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size, 1237 int tiling_mode, bool fenced) 1238 { 1239 1240 /* 1241 * Minimum alignment is 4k (GTT page size), but might be greater 1242 * if a fence register is needed for the object. 1243 */ 1244 if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) || 1245 tiling_mode == I915_TILING_NONE) 1246 return 4096; 1247 1248 /* 1249 * Previous chips need to be aligned to the size of the smallest 1250 * fence register that can contain the object. 1251 */ 1252 return i915_gem_get_gtt_size(dev, size, tiling_mode); 1253 } 1254 1255 int 1256 i915_gem_mmap_gtt(struct drm_file *file, 1257 struct drm_device *dev, 1258 uint32_t handle, 1259 uint64_t *offset) 1260 { 1261 struct drm_i915_private *dev_priv = dev->dev_private; 1262 struct drm_i915_gem_object *obj; 1263 int ret; 1264 1265 ret = i915_mutex_lock_interruptible(dev); 1266 if (ret) 1267 return ret; 1268 1269 obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle)); 1270 if (&obj->base == NULL) { 1271 ret = -ENOENT; 1272 goto unlock; 1273 } 1274 1275 if (obj->base.size > dev_priv->gtt.mappable_end) { 1276 ret = -E2BIG; 1277 goto out; 1278 } 1279 1280 if (obj->madv != I915_MADV_WILLNEED) { 1281 DRM_ERROR("Attempting to mmap a purgeable buffer\n"); 1282 ret = -EINVAL; 1283 goto out; 1284 } 1285 1286 ret = drm_gem_create_mmap_offset(&obj->base); 1287 if (ret) 1288 goto out; 1289 1290 *offset = DRM_GEM_MAPPING_OFF(obj->base.map_list.key) | 1291 DRM_GEM_MAPPING_KEY; 1292 out: 1293 drm_gem_object_unreference(&obj->base); 1294 unlock: 1295 mutex_unlock(&dev->struct_mutex); 1296 return ret; 1297 } 1298 1299 /** 1300 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 1301 * @dev: DRM device 1302 * @data: GTT mapping ioctl data 1303 * @file: GEM object info 1304 * 1305 * Simply returns the fake offset to userspace so it can mmap it. 1306 * The mmap call will end up in drm_gem_mmap(), which will set things 1307 * up so we can get faults in the handler above. 1308 * 1309 * The fault handler will take care of binding the object into the GTT 1310 * (since it may have been evicted to make room for something), allocating 1311 * a fence register, and mapping the appropriate aperture address into 1312 * userspace. 1313 */ 1314 int 1315 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 1316 struct drm_file *file) 1317 { 1318 struct drm_i915_gem_mmap_gtt *args = data; 1319 1320 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); 1321 } 1322 1323 /* Immediately discard the backing storage */ 1324 static void 1325 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 1326 { 1327 vm_object_t vm_obj; 1328 1329 vm_obj = obj->base.vm_obj; 1330 VM_OBJECT_LOCK(vm_obj); 1331 vm_object_page_remove(vm_obj, 0, 0, false); 1332 VM_OBJECT_UNLOCK(vm_obj); 1333 obj->madv = __I915_MADV_PURGED; 1334 } 1335 1336 static inline int 1337 i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj) 1338 { 1339 return obj->madv == I915_MADV_DONTNEED; 1340 } 1341 1342 static void 1343 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) 1344 { 1345 vm_page_t m; 1346 int page_count, i; 1347 1348 BUG_ON(obj->madv == __I915_MADV_PURGED); 1349 1350 if (obj->tiling_mode != I915_TILING_NONE) 1351 i915_gem_object_save_bit_17_swizzle(obj); 1352 if (obj->madv == I915_MADV_DONTNEED) 1353 obj->dirty = 0; 1354 page_count = obj->base.size / PAGE_SIZE; 1355 VM_OBJECT_LOCK(obj->base.vm_obj); 1356 #if GEM_PARANOID_CHECK_GTT 1357 i915_gem_assert_pages_not_mapped(obj->base.dev, obj->pages, page_count); 1358 #endif 1359 for (i = 0; i < page_count; i++) { 1360 m = obj->pages[i]; 1361 if (obj->dirty) 1362 vm_page_dirty(m); 1363 if (obj->madv == I915_MADV_WILLNEED) 1364 vm_page_reference(m); 1365 vm_page_busy_wait(obj->pages[i], FALSE, "i915gem"); 1366 vm_page_unwire(obj->pages[i], 1); 1367 vm_page_wakeup(obj->pages[i]); 1368 } 1369 VM_OBJECT_UNLOCK(obj->base.vm_obj); 1370 obj->dirty = 0; 1371 drm_free(obj->pages, M_DRM); 1372 obj->pages = NULL; 1373 } 1374 1375 int 1376 i915_gem_object_put_pages(struct drm_i915_gem_object *obj) 1377 { 1378 const struct drm_i915_gem_object_ops *ops = obj->ops; 1379 1380 if (obj->pages == NULL) 1381 return 0; 1382 1383 BUG_ON(obj->gtt_space); 1384 1385 if (obj->pages_pin_count) 1386 return -EBUSY; 1387 1388 /* ->put_pages might need to allocate memory for the bit17 swizzle 1389 * array, hence protect them from being reaped by removing them from gtt 1390 * lists early. */ 1391 list_del(&obj->global_list); 1392 1393 ops->put_pages(obj); 1394 obj->pages = NULL; 1395 1396 if (i915_gem_object_is_purgeable(obj)) 1397 i915_gem_object_truncate(obj); 1398 1399 return 0; 1400 } 1401 1402 static int 1403 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 1404 { 1405 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 1406 struct drm_device *dev; 1407 vm_object_t vm_obj; 1408 int page_count, i, j; 1409 struct vm_page *page; 1410 1411 dev = obj->base.dev; 1412 KASSERT(obj->pages == NULL, ("Obj already has pages")); 1413 page_count = obj->base.size / PAGE_SIZE; 1414 obj->pages = kmalloc(page_count * sizeof(vm_page_t), M_DRM, 1415 M_WAITOK); 1416 1417 vm_obj = obj->base.vm_obj; 1418 VM_OBJECT_LOCK(vm_obj); 1419 1420 for (i = 0; i < page_count; i++) { 1421 page = shmem_read_mapping_page(vm_obj, i); 1422 if (IS_ERR(page)) { 1423 i915_gem_purge(dev_priv, page_count); 1424 goto err_pages; 1425 } 1426 1427 obj->pages[i] = page; 1428 } 1429 1430 VM_OBJECT_UNLOCK(vm_obj); 1431 if (i915_gem_object_needs_bit17_swizzle(obj)) 1432 i915_gem_object_do_bit_17_swizzle(obj); 1433 1434 return 0; 1435 1436 err_pages: 1437 for (j = 0; j < i; j++) { 1438 page = obj->pages[j]; 1439 vm_page_busy_wait(page, FALSE, "i915gem"); 1440 vm_page_unwire(page, 0); 1441 vm_page_wakeup(page); 1442 } 1443 VM_OBJECT_UNLOCK(vm_obj); 1444 drm_free(obj->pages, M_DRM); 1445 obj->pages = NULL; 1446 return (-EIO); 1447 } 1448 1449 /* Ensure that the associated pages are gathered from the backing storage 1450 * and pinned into our object. i915_gem_object_get_pages() may be called 1451 * multiple times before they are released by a single call to 1452 * i915_gem_object_put_pages() - once the pages are no longer referenced 1453 * either as a result of memory pressure (reaping pages under the shrinker) 1454 * or as the object is itself released. 1455 */ 1456 int 1457 i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 1458 { 1459 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 1460 const struct drm_i915_gem_object_ops *ops = obj->ops; 1461 int ret; 1462 1463 if (obj->pages) 1464 return 0; 1465 1466 if (obj->madv != I915_MADV_WILLNEED) { 1467 DRM_ERROR("Attempting to obtain a purgeable object\n"); 1468 return -EINVAL; 1469 } 1470 1471 BUG_ON(obj->pages_pin_count); 1472 1473 ret = ops->get_pages(obj); 1474 if (ret) 1475 return ret; 1476 1477 list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list); 1478 return 0; 1479 } 1480 1481 void 1482 i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, 1483 struct intel_ring_buffer *ring) 1484 { 1485 struct drm_device *dev = obj->base.dev; 1486 struct drm_i915_private *dev_priv = dev->dev_private; 1487 u32 seqno = intel_ring_get_seqno(ring); 1488 1489 BUG_ON(ring == NULL); 1490 if (obj->ring != ring && obj->last_write_seqno) { 1491 /* Keep the seqno relative to the current ring */ 1492 obj->last_write_seqno = seqno; 1493 } 1494 obj->ring = ring; 1495 1496 /* Add a reference if we're newly entering the active list. */ 1497 if (!obj->active) { 1498 drm_gem_object_reference(&obj->base); 1499 obj->active = 1; 1500 } 1501 1502 /* Move from whatever list we were on to the tail of execution. */ 1503 list_move_tail(&obj->mm_list, &dev_priv->mm.active_list); 1504 list_move_tail(&obj->ring_list, &ring->active_list); 1505 1506 obj->last_read_seqno = seqno; 1507 1508 if (obj->fenced_gpu_access) { 1509 obj->last_fenced_seqno = seqno; 1510 1511 /* Bump MRU to take account of the delayed flush */ 1512 if (obj->fence_reg != I915_FENCE_REG_NONE) { 1513 struct drm_i915_fence_reg *reg; 1514 1515 reg = &dev_priv->fence_regs[obj->fence_reg]; 1516 list_move_tail(®->lru_list, 1517 &dev_priv->mm.fence_list); 1518 } 1519 } 1520 } 1521 1522 static void 1523 i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj) 1524 { 1525 struct drm_device *dev = obj->base.dev; 1526 struct drm_i915_private *dev_priv = dev->dev_private; 1527 1528 BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS); 1529 BUG_ON(!obj->active); 1530 1531 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list); 1532 1533 list_del_init(&obj->ring_list); 1534 obj->ring = NULL; 1535 1536 obj->last_read_seqno = 0; 1537 obj->last_write_seqno = 0; 1538 obj->base.write_domain = 0; 1539 1540 obj->last_fenced_seqno = 0; 1541 obj->fenced_gpu_access = false; 1542 1543 obj->active = 0; 1544 drm_gem_object_unreference(&obj->base); 1545 1546 WARN_ON(i915_verify_lists(dev)); 1547 } 1548 1549 static int 1550 i915_gem_init_seqno(struct drm_device *dev, u32 seqno) 1551 { 1552 struct drm_i915_private *dev_priv = dev->dev_private; 1553 struct intel_ring_buffer *ring; 1554 int ret, i, j; 1555 1556 /* Carefully retire all requests without writing to the rings */ 1557 for_each_ring(ring, dev_priv, i) { 1558 ret = intel_ring_idle(ring); 1559 if (ret) 1560 return ret; 1561 } 1562 i915_gem_retire_requests(dev); 1563 1564 /* Finally reset hw state */ 1565 for_each_ring(ring, dev_priv, i) { 1566 intel_ring_init_seqno(ring, seqno); 1567 1568 for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++) 1569 ring->sync_seqno[j] = 0; 1570 } 1571 1572 return 0; 1573 } 1574 1575 int i915_gem_set_seqno(struct drm_device *dev, u32 seqno) 1576 { 1577 struct drm_i915_private *dev_priv = dev->dev_private; 1578 int ret; 1579 1580 if (seqno == 0) 1581 return -EINVAL; 1582 1583 /* HWS page needs to be set less than what we 1584 * will inject to ring 1585 */ 1586 ret = i915_gem_init_seqno(dev, seqno - 1); 1587 if (ret) 1588 return ret; 1589 1590 /* Carefully set the last_seqno value so that wrap 1591 * detection still works 1592 */ 1593 dev_priv->next_seqno = seqno; 1594 dev_priv->last_seqno = seqno - 1; 1595 if (dev_priv->last_seqno == 0) 1596 dev_priv->last_seqno--; 1597 1598 return 0; 1599 } 1600 1601 int 1602 i915_gem_get_seqno(struct drm_device *dev, u32 *seqno) 1603 { 1604 struct drm_i915_private *dev_priv = dev->dev_private; 1605 1606 /* reserve 0 for non-seqno */ 1607 if (dev_priv->next_seqno == 0) { 1608 int ret = i915_gem_init_seqno(dev, 0); 1609 if (ret) 1610 return ret; 1611 1612 dev_priv->next_seqno = 1; 1613 } 1614 1615 *seqno = dev_priv->last_seqno = dev_priv->next_seqno++; 1616 return 0; 1617 } 1618 1619 int __i915_add_request(struct intel_ring_buffer *ring, 1620 struct drm_file *file, 1621 struct drm_i915_gem_object *obj, 1622 u32 *out_seqno) 1623 { 1624 drm_i915_private_t *dev_priv = ring->dev->dev_private; 1625 struct drm_i915_gem_request *request; 1626 u32 request_ring_position, request_start; 1627 int was_empty; 1628 int ret; 1629 1630 request_start = intel_ring_get_tail(ring); 1631 /* 1632 * Emit any outstanding flushes - execbuf can fail to emit the flush 1633 * after having emitted the batchbuffer command. Hence we need to fix 1634 * things up similar to emitting the lazy request. The difference here 1635 * is that the flush _must_ happen before the next request, no matter 1636 * what. 1637 */ 1638 ret = intel_ring_flush_all_caches(ring); 1639 if (ret) 1640 return ret; 1641 1642 request = kmalloc(sizeof(*request), M_DRM, M_WAITOK); 1643 if (request == NULL) 1644 return -ENOMEM; 1645 1646 1647 /* Record the position of the start of the request so that 1648 * should we detect the updated seqno part-way through the 1649 * GPU processing the request, we never over-estimate the 1650 * position of the head. 1651 */ 1652 request_ring_position = intel_ring_get_tail(ring); 1653 1654 ret = ring->add_request(ring); 1655 if (ret) { 1656 kfree(request); 1657 return ret; 1658 } 1659 1660 request->seqno = intel_ring_get_seqno(ring); 1661 request->ring = ring; 1662 request->head = request_start; 1663 request->tail = request_ring_position; 1664 request->ctx = ring->last_context; 1665 request->batch_obj = obj; 1666 1667 /* Whilst this request exists, batch_obj will be on the 1668 * active_list, and so will hold the active reference. Only when this 1669 * request is retired will the the batch_obj be moved onto the 1670 * inactive_list and lose its active reference. Hence we do not need 1671 * to explicitly hold another reference here. 1672 */ 1673 1674 if (request->ctx) 1675 i915_gem_context_reference(request->ctx); 1676 1677 request->emitted_jiffies = jiffies; 1678 was_empty = list_empty(&ring->request_list); 1679 list_add_tail(&request->list, &ring->request_list); 1680 request->file_priv = NULL; 1681 1682 if (file) { 1683 struct drm_i915_file_private *file_priv = file->driver_priv; 1684 1685 spin_lock(&file_priv->mm.lock); 1686 request->file_priv = file_priv; 1687 list_add_tail(&request->client_list, 1688 &file_priv->mm.request_list); 1689 spin_unlock(&file_priv->mm.lock); 1690 } 1691 1692 ring->outstanding_lazy_request = 0; 1693 1694 if (!dev_priv->mm.suspended) { 1695 if (i915_enable_hangcheck) { 1696 mod_timer(&dev_priv->gpu_error.hangcheck_timer, 1697 round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES)); 1698 } 1699 if (was_empty) { 1700 queue_delayed_work(dev_priv->wq, 1701 &dev_priv->mm.retire_work, 1702 round_jiffies_up_relative(hz)); 1703 intel_mark_busy(dev_priv->dev); 1704 } 1705 } 1706 1707 if (out_seqno) 1708 *out_seqno = request->seqno; 1709 return 0; 1710 } 1711 1712 static inline void 1713 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) 1714 { 1715 struct drm_i915_file_private *file_priv = request->file_priv; 1716 1717 if (!file_priv) 1718 return; 1719 1720 spin_lock(&file_priv->mm.lock); 1721 if (request->file_priv) { 1722 list_del(&request->client_list); 1723 request->file_priv = NULL; 1724 } 1725 spin_unlock(&file_priv->mm.lock); 1726 } 1727 1728 static bool i915_head_inside_object(u32 acthd, struct drm_i915_gem_object *obj) 1729 { 1730 if (acthd >= obj->gtt_offset && 1731 acthd < obj->gtt_offset + obj->base.size) 1732 return true; 1733 1734 return false; 1735 } 1736 1737 static bool i915_head_inside_request(const u32 acthd_unmasked, 1738 const u32 request_start, 1739 const u32 request_end) 1740 { 1741 const u32 acthd = acthd_unmasked & HEAD_ADDR; 1742 1743 if (request_start < request_end) { 1744 if (acthd >= request_start && acthd < request_end) 1745 return true; 1746 } else if (request_start > request_end) { 1747 if (acthd >= request_start || acthd < request_end) 1748 return true; 1749 } 1750 1751 return false; 1752 } 1753 1754 static bool i915_request_guilty(struct drm_i915_gem_request *request, 1755 const u32 acthd, bool *inside) 1756 { 1757 /* There is a possibility that unmasked head address 1758 * pointing inside the ring, matches the batch_obj address range. 1759 * However this is extremely unlikely. 1760 */ 1761 1762 if (request->batch_obj) { 1763 if (i915_head_inside_object(acthd, request->batch_obj)) { 1764 *inside = true; 1765 return true; 1766 } 1767 } 1768 1769 if (i915_head_inside_request(acthd, request->head, request->tail)) { 1770 *inside = false; 1771 return true; 1772 } 1773 1774 return false; 1775 } 1776 1777 static void i915_set_reset_status(struct intel_ring_buffer *ring, 1778 struct drm_i915_gem_request *request, 1779 u32 acthd) 1780 { 1781 struct i915_ctx_hang_stats *hs = NULL; 1782 bool inside, guilty; 1783 1784 /* Innocent until proven guilty */ 1785 guilty = false; 1786 1787 if (ring->hangcheck.action != wait && 1788 i915_request_guilty(request, acthd, &inside)) { 1789 DRM_ERROR("%s hung %s bo (0x%x ctx %d) at 0x%x\n", 1790 ring->name, 1791 inside ? "inside" : "flushing", 1792 request->batch_obj ? 1793 request->batch_obj->gtt_offset : 0, 1794 request->ctx ? request->ctx->id : 0, 1795 acthd); 1796 1797 guilty = true; 1798 } 1799 1800 /* If contexts are disabled or this is the default context, use 1801 * file_priv->reset_state 1802 */ 1803 if (request->ctx && request->ctx->id != DEFAULT_CONTEXT_ID) 1804 hs = &request->ctx->hang_stats; 1805 else if (request->file_priv) 1806 hs = &request->file_priv->hang_stats; 1807 1808 if (hs) { 1809 if (guilty) 1810 hs->batch_active++; 1811 else 1812 hs->batch_pending++; 1813 } 1814 } 1815 1816 static void i915_gem_free_request(struct drm_i915_gem_request *request) 1817 { 1818 list_del(&request->list); 1819 i915_gem_request_remove_from_client(request); 1820 1821 if (request->ctx) 1822 i915_gem_context_unreference(request->ctx); 1823 1824 kfree(request); 1825 } 1826 1827 static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv, 1828 struct intel_ring_buffer *ring) 1829 { 1830 u32 completed_seqno; 1831 u32 acthd; 1832 1833 acthd = intel_ring_get_active_head(ring); 1834 completed_seqno = ring->get_seqno(ring, false); 1835 1836 while (!list_empty(&ring->request_list)) { 1837 struct drm_i915_gem_request *request; 1838 1839 request = list_first_entry(&ring->request_list, 1840 struct drm_i915_gem_request, 1841 list); 1842 1843 if (request->seqno > completed_seqno) 1844 i915_set_reset_status(ring, request, acthd); 1845 1846 i915_gem_free_request(request); 1847 } 1848 1849 while (!list_empty(&ring->active_list)) { 1850 struct drm_i915_gem_object *obj; 1851 1852 obj = list_first_entry(&ring->active_list, 1853 struct drm_i915_gem_object, 1854 ring_list); 1855 1856 i915_gem_object_move_to_inactive(obj); 1857 } 1858 } 1859 1860 void i915_gem_restore_fences(struct drm_device *dev) 1861 { 1862 struct drm_i915_private *dev_priv = dev->dev_private; 1863 int i; 1864 1865 for (i = 0; i < dev_priv->num_fence_regs; i++) { 1866 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; 1867 1868 /* 1869 * Commit delayed tiling changes if we have an object still 1870 * attached to the fence, otherwise just clear the fence. 1871 */ 1872 if (reg->obj) { 1873 i915_gem_object_update_fence(reg->obj, reg, 1874 reg->obj->tiling_mode); 1875 } else { 1876 i915_gem_write_fence(dev, i, NULL); 1877 } 1878 } 1879 } 1880 1881 void i915_gem_reset(struct drm_device *dev) 1882 { 1883 struct drm_i915_private *dev_priv = dev->dev_private; 1884 struct drm_i915_gem_object *obj; 1885 struct intel_ring_buffer *ring; 1886 int i; 1887 1888 for_each_ring(ring, dev_priv, i) 1889 i915_gem_reset_ring_lists(dev_priv, ring); 1890 1891 /* Move everything out of the GPU domains to ensure we do any 1892 * necessary invalidation upon reuse. 1893 */ 1894 list_for_each_entry(obj, 1895 &dev_priv->mm.inactive_list, 1896 mm_list) 1897 { 1898 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS; 1899 } 1900 1901 i915_gem_restore_fences(dev); 1902 } 1903 1904 /** 1905 * This function clears the request list as sequence numbers are passed. 1906 */ 1907 void 1908 i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) 1909 { 1910 uint32_t seqno; 1911 1912 if (list_empty(&ring->request_list)) 1913 return; 1914 1915 WARN_ON(i915_verify_lists(ring->dev)); 1916 1917 seqno = ring->get_seqno(ring, true); 1918 1919 while (!list_empty(&ring->request_list)) { 1920 struct drm_i915_gem_request *request; 1921 1922 request = list_first_entry(&ring->request_list, 1923 struct drm_i915_gem_request, 1924 list); 1925 1926 if (!i915_seqno_passed(seqno, request->seqno)) 1927 break; 1928 1929 /* We know the GPU must have read the request to have 1930 * sent us the seqno + interrupt, so use the position 1931 * of tail of the request to update the last known position 1932 * of the GPU head. 1933 */ 1934 ring->last_retired_head = request->tail; 1935 1936 i915_gem_free_request(request); 1937 } 1938 1939 /* Move any buffers on the active list that are no longer referenced 1940 * by the ringbuffer to the flushing/inactive lists as appropriate. 1941 */ 1942 while (!list_empty(&ring->active_list)) { 1943 struct drm_i915_gem_object *obj; 1944 1945 obj = list_first_entry(&ring->active_list, 1946 struct drm_i915_gem_object, 1947 ring_list); 1948 1949 if (!i915_seqno_passed(seqno, obj->last_read_seqno)) 1950 break; 1951 1952 i915_gem_object_move_to_inactive(obj); 1953 } 1954 1955 if (unlikely(ring->trace_irq_seqno && 1956 i915_seqno_passed(seqno, ring->trace_irq_seqno))) { 1957 ring->irq_put(ring); 1958 ring->trace_irq_seqno = 0; 1959 } 1960 1961 } 1962 1963 void 1964 i915_gem_retire_requests(struct drm_device *dev) 1965 { 1966 drm_i915_private_t *dev_priv = dev->dev_private; 1967 struct intel_ring_buffer *ring; 1968 int i; 1969 1970 for_each_ring(ring, dev_priv, i) 1971 i915_gem_retire_requests_ring(ring); 1972 } 1973 1974 static long 1975 __i915_gem_shrink(struct drm_i915_private *dev_priv, long target, 1976 bool purgeable_only) 1977 { 1978 struct drm_i915_gem_object *obj, *next; 1979 long count = 0; 1980 1981 list_for_each_entry_safe(obj, next, 1982 &dev_priv->mm.unbound_list, 1983 global_list) { 1984 #if 0 1985 if ((i915_gem_object_is_purgeable(obj) || !purgeable_only) && 1986 i915_gem_object_put_pages(obj) == 0) { 1987 count += obj->base.size >> PAGE_SHIFT; 1988 if (count >= target) 1989 return count; 1990 } 1991 #endif 1992 } 1993 1994 list_for_each_entry_safe(obj, next, 1995 &dev_priv->mm.inactive_list, 1996 mm_list) { 1997 #if 0 1998 if ((i915_gem_object_is_purgeable(obj) || !purgeable_only) && 1999 i915_gem_object_unbind(obj) == 0 && 2000 i915_gem_object_put_pages(obj) == 0) { 2001 count += obj->base.size >> PAGE_SHIFT; 2002 if (count >= target) 2003 return count; 2004 } 2005 #endif 2006 } 2007 2008 return count; 2009 } 2010 2011 static long 2012 i915_gem_purge(struct drm_i915_private *dev_priv, long target) 2013 { 2014 return __i915_gem_shrink(dev_priv, target, true); 2015 } 2016 2017 static void 2018 i915_gem_retire_work_handler(struct work_struct *work) 2019 { 2020 drm_i915_private_t *dev_priv; 2021 struct drm_device *dev; 2022 struct intel_ring_buffer *ring; 2023 bool idle; 2024 int i; 2025 2026 dev_priv = container_of(work, drm_i915_private_t, 2027 mm.retire_work.work); 2028 dev = dev_priv->dev; 2029 2030 /* Come back later if the device is busy... */ 2031 if (lockmgr(&dev->struct_mutex, LK_EXCLUSIVE|LK_NOWAIT)) { 2032 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 2033 round_jiffies_up_relative(hz)); 2034 return; 2035 } 2036 2037 i915_gem_retire_requests(dev); 2038 2039 /* Send a periodic flush down the ring so we don't hold onto GEM 2040 * objects indefinitely. 2041 */ 2042 idle = true; 2043 for_each_ring(ring, dev_priv, i) { 2044 if (ring->gpu_caches_dirty) 2045 i915_add_request(ring, NULL); 2046 2047 idle &= list_empty(&ring->request_list); 2048 } 2049 2050 if (!dev_priv->mm.suspended && !idle) 2051 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 2052 round_jiffies_up_relative(hz)); 2053 if (idle) 2054 intel_mark_idle(dev); 2055 2056 mutex_unlock(&dev->struct_mutex); 2057 } 2058 /** 2059 * Ensures that an object will eventually get non-busy by flushing any required 2060 * write domains, emitting any outstanding lazy request and retiring and 2061 * completed requests. 2062 */ 2063 static int 2064 i915_gem_object_flush_active(struct drm_i915_gem_object *obj) 2065 { 2066 int ret; 2067 2068 if (obj->active) { 2069 ret = i915_gem_check_olr(obj->ring, obj->last_read_seqno); 2070 if (ret) 2071 return ret; 2072 2073 i915_gem_retire_requests_ring(obj->ring); 2074 } 2075 2076 return 0; 2077 } 2078 2079 /** 2080 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT 2081 * @DRM_IOCTL_ARGS: standard ioctl arguments 2082 * 2083 * Returns 0 if successful, else an error is returned with the remaining time in 2084 * the timeout parameter. 2085 * -ETIME: object is still busy after timeout 2086 * -ERESTARTSYS: signal interrupted the wait 2087 * -ENONENT: object doesn't exist 2088 * Also possible, but rare: 2089 * -EAGAIN: GPU wedged 2090 * -ENOMEM: damn 2091 * -ENODEV: Internal IRQ fail 2092 * -E?: The add request failed 2093 * 2094 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any 2095 * non-zero timeout parameter the wait ioctl will wait for the given number of 2096 * nanoseconds on an object becoming unbusy. Since the wait itself does so 2097 * without holding struct_mutex the object may become re-busied before this 2098 * function completes. A similar but shorter * race condition exists in the busy 2099 * ioctl 2100 */ 2101 int 2102 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 2103 { 2104 drm_i915_private_t *dev_priv = dev->dev_private; 2105 struct drm_i915_gem_wait *args = data; 2106 struct drm_i915_gem_object *obj; 2107 struct intel_ring_buffer *ring = NULL; 2108 struct timespec timeout_stack, *timeout = NULL; 2109 unsigned reset_counter; 2110 u32 seqno = 0; 2111 int ret = 0; 2112 2113 if (args->timeout_ns >= 0) { 2114 timeout_stack = ns_to_timespec(args->timeout_ns); 2115 timeout = &timeout_stack; 2116 } 2117 2118 ret = i915_mutex_lock_interruptible(dev); 2119 if (ret) 2120 return ret; 2121 2122 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->bo_handle)); 2123 if (&obj->base == NULL) { 2124 mutex_unlock(&dev->struct_mutex); 2125 return -ENOENT; 2126 } 2127 2128 /* Need to make sure the object gets inactive eventually. */ 2129 ret = i915_gem_object_flush_active(obj); 2130 if (ret) 2131 goto out; 2132 2133 if (obj->active) { 2134 seqno = obj->last_read_seqno; 2135 ring = obj->ring; 2136 } 2137 2138 if (seqno == 0) 2139 goto out; 2140 2141 /* Do this after OLR check to make sure we make forward progress polling 2142 * on this IOCTL with a 0 timeout (like busy ioctl) 2143 */ 2144 if (!args->timeout_ns) { 2145 ret = -ETIMEDOUT; 2146 goto out; 2147 } 2148 2149 drm_gem_object_unreference(&obj->base); 2150 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 2151 mutex_unlock(&dev->struct_mutex); 2152 2153 ret = __wait_seqno(ring, seqno, reset_counter, true, timeout); 2154 if (timeout) 2155 args->timeout_ns = timespec_to_ns(timeout); 2156 return ret; 2157 2158 out: 2159 drm_gem_object_unreference(&obj->base); 2160 mutex_unlock(&dev->struct_mutex); 2161 return ret; 2162 } 2163 2164 /** 2165 * i915_gem_object_sync - sync an object to a ring. 2166 * 2167 * @obj: object which may be in use on another ring. 2168 * @to: ring we wish to use the object on. May be NULL. 2169 * 2170 * This code is meant to abstract object synchronization with the GPU. 2171 * Calling with NULL implies synchronizing the object with the CPU 2172 * rather than a particular GPU ring. 2173 * 2174 * Returns 0 if successful, else propagates up the lower layer error. 2175 */ 2176 int 2177 i915_gem_object_sync(struct drm_i915_gem_object *obj, 2178 struct intel_ring_buffer *to) 2179 { 2180 struct intel_ring_buffer *from = obj->ring; 2181 u32 seqno; 2182 int ret, idx; 2183 2184 if (from == NULL || to == from) 2185 return 0; 2186 2187 if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev)) 2188 return i915_gem_object_wait_rendering(obj, false); 2189 2190 idx = intel_ring_sync_index(from, to); 2191 2192 seqno = obj->last_read_seqno; 2193 if (seqno <= from->sync_seqno[idx]) 2194 return 0; 2195 2196 ret = i915_gem_check_olr(obj->ring, seqno); 2197 if (ret) 2198 return ret; 2199 2200 ret = to->sync_to(to, from, seqno); 2201 if (!ret) 2202 /* We use last_read_seqno because sync_to() 2203 * might have just caused seqno wrap under 2204 * the radar. 2205 */ 2206 from->sync_seqno[idx] = obj->last_read_seqno; 2207 2208 return ret; 2209 } 2210 2211 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) 2212 { 2213 u32 old_write_domain, old_read_domains; 2214 2215 /* Force a pagefault for domain tracking on next user access */ 2216 i915_gem_release_mmap(obj); 2217 2218 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 2219 return; 2220 2221 /* Wait for any direct GTT access to complete */ 2222 cpu_mfence(); 2223 2224 old_read_domains = obj->base.read_domains; 2225 old_write_domain = obj->base.write_domain; 2226 2227 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT; 2228 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT; 2229 2230 } 2231 2232 /** 2233 * Unbinds an object from the GTT aperture. 2234 */ 2235 int 2236 i915_gem_object_unbind(struct drm_i915_gem_object *obj) 2237 { 2238 drm_i915_private_t *dev_priv = obj->base.dev->dev_private; 2239 int ret; 2240 2241 if (obj->gtt_space == NULL) 2242 return 0; 2243 2244 if (obj->pin_count) 2245 return -EBUSY; 2246 2247 BUG_ON(obj->pages == NULL); 2248 2249 ret = i915_gem_object_finish_gpu(obj); 2250 if (ret) 2251 return ret; 2252 /* Continue on if we fail due to EIO, the GPU is hung so we 2253 * should be safe and we need to cleanup or else we might 2254 * cause memory corruption through use-after-free. 2255 */ 2256 2257 i915_gem_object_finish_gtt(obj); 2258 2259 /* Move the object to the CPU domain to ensure that 2260 * any possible CPU writes while it's not in the GTT 2261 * are flushed when we go to remap it. 2262 */ 2263 if (ret == 0) 2264 ret = i915_gem_object_set_to_cpu_domain(obj, 1); 2265 if (ret == -ERESTARTSYS) 2266 return ret; 2267 if (ret) { 2268 /* In the event of a disaster, abandon all caches and 2269 * hope for the best. 2270 */ 2271 i915_gem_clflush_object(obj); 2272 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 2273 } 2274 2275 /* release the fence reg _after_ flushing */ 2276 ret = i915_gem_object_put_fence(obj); 2277 if (ret) 2278 return ret; 2279 2280 if (obj->has_global_gtt_mapping) 2281 i915_gem_gtt_unbind_object(obj); 2282 if (obj->has_aliasing_ppgtt_mapping) { 2283 i915_ppgtt_unbind_object(dev_priv->mm.aliasing_ppgtt, obj); 2284 obj->has_aliasing_ppgtt_mapping = 0; 2285 } 2286 i915_gem_gtt_finish_object(obj); 2287 2288 i915_gem_object_put_pages_gtt(obj); 2289 2290 list_del_init(&obj->global_list); 2291 list_del_init(&obj->mm_list); 2292 /* Avoid an unnecessary call to unbind on rebind. */ 2293 obj->map_and_fenceable = true; 2294 2295 drm_mm_put_block(obj->gtt_space); 2296 obj->gtt_space = NULL; 2297 obj->gtt_offset = 0; 2298 2299 if (i915_gem_object_is_purgeable(obj)) 2300 i915_gem_object_truncate(obj); 2301 2302 return ret; 2303 } 2304 2305 int i915_gpu_idle(struct drm_device *dev) 2306 { 2307 drm_i915_private_t *dev_priv = dev->dev_private; 2308 struct intel_ring_buffer *ring; 2309 int ret, i; 2310 2311 /* Flush everything onto the inactive list. */ 2312 for_each_ring(ring, dev_priv, i) { 2313 ret = i915_switch_context(ring, NULL, DEFAULT_CONTEXT_ID); 2314 if (ret) 2315 return ret; 2316 2317 ret = intel_ring_idle(ring); 2318 if (ret) 2319 return ret; 2320 } 2321 2322 return 0; 2323 } 2324 2325 static void i965_write_fence_reg(struct drm_device *dev, int reg, 2326 struct drm_i915_gem_object *obj) 2327 { 2328 drm_i915_private_t *dev_priv = dev->dev_private; 2329 int fence_reg; 2330 int fence_pitch_shift; 2331 2332 if (INTEL_INFO(dev)->gen >= 6) { 2333 fence_reg = FENCE_REG_SANDYBRIDGE_0; 2334 fence_pitch_shift = SANDYBRIDGE_FENCE_PITCH_SHIFT; 2335 } else { 2336 fence_reg = FENCE_REG_965_0; 2337 fence_pitch_shift = I965_FENCE_PITCH_SHIFT; 2338 } 2339 2340 fence_reg += reg * 8; 2341 2342 /* To w/a incoherency with non-atomic 64-bit register updates, 2343 * we split the 64-bit update into two 32-bit writes. In order 2344 * for a partial fence not to be evaluated between writes, we 2345 * precede the update with write to turn off the fence register, 2346 * and only enable the fence as the last step. 2347 * 2348 * For extra levels of paranoia, we make sure each step lands 2349 * before applying the next step. 2350 */ 2351 I915_WRITE(fence_reg, 0); 2352 POSTING_READ(fence_reg); 2353 2354 if (obj) { 2355 u32 size = obj->gtt_space->size; 2356 uint64_t val; 2357 2358 val = (uint64_t)((obj->gtt_offset + size - 4096) & 2359 0xfffff000) << 32; 2360 val |= obj->gtt_offset & 0xfffff000; 2361 val |= (uint64_t)((obj->stride / 128) - 1) << fence_pitch_shift; 2362 if (obj->tiling_mode == I915_TILING_Y) 2363 val |= 1 << I965_FENCE_TILING_Y_SHIFT; 2364 val |= I965_FENCE_REG_VALID; 2365 2366 I915_WRITE(fence_reg + 4, val >> 32); 2367 POSTING_READ(fence_reg + 4); 2368 2369 I915_WRITE(fence_reg + 0, val); 2370 POSTING_READ(fence_reg); 2371 } else { 2372 I915_WRITE(fence_reg + 4, 0); 2373 POSTING_READ(fence_reg + 4); 2374 } 2375 } 2376 2377 static void i915_write_fence_reg(struct drm_device *dev, int reg, 2378 struct drm_i915_gem_object *obj) 2379 { 2380 drm_i915_private_t *dev_priv = dev->dev_private; 2381 u32 val; 2382 2383 if (obj) { 2384 u32 size = obj->gtt_space->size; 2385 int pitch_val; 2386 int tile_width; 2387 2388 WARN((obj->gtt_offset & ~I915_FENCE_START_MASK) || 2389 (size & -size) != size || 2390 (obj->gtt_offset & (size - 1)), 2391 "object 0x%08x [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n", 2392 obj->gtt_offset, obj->map_and_fenceable, size); 2393 2394 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)) 2395 tile_width = 128; 2396 else 2397 tile_width = 512; 2398 2399 /* Note: pitch better be a power of two tile widths */ 2400 pitch_val = obj->stride / tile_width; 2401 pitch_val = ffs(pitch_val) - 1; 2402 2403 val = obj->gtt_offset; 2404 if (obj->tiling_mode == I915_TILING_Y) 2405 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 2406 val |= I915_FENCE_SIZE_BITS(size); 2407 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 2408 val |= I830_FENCE_REG_VALID; 2409 } else 2410 val = 0; 2411 2412 if (reg < 8) 2413 reg = FENCE_REG_830_0 + reg * 4; 2414 else 2415 reg = FENCE_REG_945_8 + (reg - 8) * 4; 2416 2417 I915_WRITE(reg, val); 2418 POSTING_READ(reg); 2419 } 2420 2421 static void i830_write_fence_reg(struct drm_device *dev, int reg, 2422 struct drm_i915_gem_object *obj) 2423 { 2424 drm_i915_private_t *dev_priv = dev->dev_private; 2425 uint32_t val; 2426 2427 if (obj) { 2428 u32 size = obj->gtt_space->size; 2429 uint32_t pitch_val; 2430 2431 WARN((obj->gtt_offset & ~I830_FENCE_START_MASK) || 2432 (size & -size) != size || 2433 (obj->gtt_offset & (size - 1)), 2434 "object 0x%08x not 512K or pot-size 0x%08x aligned\n", 2435 obj->gtt_offset, size); 2436 2437 pitch_val = obj->stride / 128; 2438 pitch_val = ffs(pitch_val) - 1; 2439 2440 val = obj->gtt_offset; 2441 if (obj->tiling_mode == I915_TILING_Y) 2442 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 2443 val |= I830_FENCE_SIZE_BITS(size); 2444 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 2445 val |= I830_FENCE_REG_VALID; 2446 } else 2447 val = 0; 2448 2449 I915_WRITE(FENCE_REG_830_0 + reg * 4, val); 2450 POSTING_READ(FENCE_REG_830_0 + reg * 4); 2451 } 2452 2453 inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj) 2454 { 2455 return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT; 2456 } 2457 2458 static void i915_gem_write_fence(struct drm_device *dev, int reg, 2459 struct drm_i915_gem_object *obj) 2460 { 2461 struct drm_i915_private *dev_priv = dev->dev_private; 2462 2463 /* Ensure that all CPU reads are completed before installing a fence 2464 * and all writes before removing the fence. 2465 */ 2466 if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj)) 2467 cpu_mfence(); 2468 2469 WARN(obj && (!obj->stride || !obj->tiling_mode), 2470 "bogus fence setup with stride: 0x%x, tiling mode: %i\n", 2471 obj->stride, obj->tiling_mode); 2472 2473 switch (INTEL_INFO(dev)->gen) { 2474 case 7: 2475 case 6: 2476 case 5: 2477 case 4: i965_write_fence_reg(dev, reg, obj); break; 2478 case 3: i915_write_fence_reg(dev, reg, obj); break; 2479 case 2: i830_write_fence_reg(dev, reg, obj); break; 2480 default: BUG(); 2481 } 2482 2483 /* And similarly be paranoid that no direct access to this region 2484 * is reordered to before the fence is installed. 2485 */ 2486 if (i915_gem_object_needs_mb(obj)) 2487 cpu_mfence(); 2488 } 2489 2490 static inline int fence_number(struct drm_i915_private *dev_priv, 2491 struct drm_i915_fence_reg *fence) 2492 { 2493 return fence - dev_priv->fence_regs; 2494 } 2495 2496 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, 2497 struct drm_i915_fence_reg *fence, 2498 bool enable) 2499 { 2500 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2501 int reg = fence_number(dev_priv, fence); 2502 2503 i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL); 2504 2505 if (enable) { 2506 obj->fence_reg = reg; 2507 fence->obj = obj; 2508 list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list); 2509 } else { 2510 obj->fence_reg = I915_FENCE_REG_NONE; 2511 fence->obj = NULL; 2512 list_del_init(&fence->lru_list); 2513 } 2514 obj->fence_dirty = false; 2515 } 2516 2517 static int 2518 i915_gem_object_wait_fence(struct drm_i915_gem_object *obj) 2519 { 2520 if (obj->last_fenced_seqno) { 2521 int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno); 2522 if (ret) 2523 return ret; 2524 2525 obj->last_fenced_seqno = 0; 2526 } 2527 2528 obj->fenced_gpu_access = false; 2529 return 0; 2530 } 2531 2532 int 2533 i915_gem_object_put_fence(struct drm_i915_gem_object *obj) 2534 { 2535 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2536 struct drm_i915_fence_reg *fence; 2537 int ret; 2538 2539 ret = i915_gem_object_wait_fence(obj); 2540 if (ret) 2541 return ret; 2542 2543 if (obj->fence_reg == I915_FENCE_REG_NONE) 2544 return 0; 2545 2546 fence = &dev_priv->fence_regs[obj->fence_reg]; 2547 2548 i915_gem_object_fence_lost(obj); 2549 i915_gem_object_update_fence(obj, fence, false); 2550 2551 return 0; 2552 } 2553 2554 static struct drm_i915_fence_reg * 2555 i915_find_fence_reg(struct drm_device *dev) 2556 { 2557 struct drm_i915_private *dev_priv = dev->dev_private; 2558 struct drm_i915_fence_reg *reg, *avail; 2559 int i; 2560 2561 /* First try to find a free reg */ 2562 avail = NULL; 2563 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) { 2564 reg = &dev_priv->fence_regs[i]; 2565 if (!reg->obj) 2566 return reg; 2567 2568 if (!reg->pin_count) 2569 avail = reg; 2570 } 2571 2572 if (avail == NULL) 2573 return NULL; 2574 2575 /* None available, try to steal one or wait for a user to finish */ 2576 list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) { 2577 if (reg->pin_count) 2578 continue; 2579 2580 return reg; 2581 } 2582 2583 return NULL; 2584 } 2585 2586 /** 2587 * i915_gem_object_get_fence - set up fencing for an object 2588 * @obj: object to map through a fence reg 2589 * 2590 * When mapping objects through the GTT, userspace wants to be able to write 2591 * to them without having to worry about swizzling if the object is tiled. 2592 * This function walks the fence regs looking for a free one for @obj, 2593 * stealing one if it can't find any. 2594 * 2595 * It then sets up the reg based on the object's properties: address, pitch 2596 * and tiling format. 2597 * 2598 * For an untiled surface, this removes any existing fence. 2599 */ 2600 int 2601 i915_gem_object_get_fence(struct drm_i915_gem_object *obj) 2602 { 2603 struct drm_device *dev = obj->base.dev; 2604 struct drm_i915_private *dev_priv = dev->dev_private; 2605 bool enable = obj->tiling_mode != I915_TILING_NONE; 2606 struct drm_i915_fence_reg *reg; 2607 int ret; 2608 2609 /* Have we updated the tiling parameters upon the object and so 2610 * will need to serialise the write to the associated fence register? 2611 */ 2612 if (obj->fence_dirty) { 2613 ret = i915_gem_object_wait_fence(obj); 2614 if (ret) 2615 return ret; 2616 } 2617 2618 /* Just update our place in the LRU if our fence is getting reused. */ 2619 if (obj->fence_reg != I915_FENCE_REG_NONE) { 2620 reg = &dev_priv->fence_regs[obj->fence_reg]; 2621 if (!obj->fence_dirty) { 2622 list_move_tail(®->lru_list, 2623 &dev_priv->mm.fence_list); 2624 return 0; 2625 } 2626 } else if (enable) { 2627 reg = i915_find_fence_reg(dev); 2628 if (reg == NULL) 2629 return -EDEADLK; 2630 2631 if (reg->obj) { 2632 struct drm_i915_gem_object *old = reg->obj; 2633 2634 ret = i915_gem_object_wait_fence(old); 2635 if (ret) 2636 return ret; 2637 2638 i915_gem_object_fence_lost(old); 2639 } 2640 } else 2641 return 0; 2642 2643 i915_gem_object_update_fence(obj, reg, enable); 2644 2645 return 0; 2646 } 2647 2648 static bool i915_gem_valid_gtt_space(struct drm_device *dev, 2649 struct drm_mm_node *gtt_space, 2650 unsigned long cache_level) 2651 { 2652 struct drm_mm_node *other; 2653 2654 /* On non-LLC machines we have to be careful when putting differing 2655 * types of snoopable memory together to avoid the prefetcher 2656 * crossing memory domains and dying. 2657 */ 2658 if (HAS_LLC(dev)) 2659 return true; 2660 2661 if (gtt_space == NULL) 2662 return true; 2663 2664 if (list_empty(>t_space->node_list)) 2665 return true; 2666 2667 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list); 2668 if (other->allocated && !other->hole_follows && other->color != cache_level) 2669 return false; 2670 2671 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list); 2672 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level) 2673 return false; 2674 2675 return true; 2676 } 2677 2678 static void i915_gem_verify_gtt(struct drm_device *dev) 2679 { 2680 #if WATCH_GTT 2681 struct drm_i915_private *dev_priv = dev->dev_private; 2682 struct drm_i915_gem_object *obj; 2683 int err = 0; 2684 2685 list_for_each_entry(obj, &dev_priv->mm.global_list, global_list) { 2686 if (obj->gtt_space == NULL) { 2687 printk(KERN_ERR "object found on GTT list with no space reserved\n"); 2688 err++; 2689 continue; 2690 } 2691 2692 if (obj->cache_level != obj->gtt_space->color) { 2693 printk(KERN_ERR "object reserved space [%08lx, %08lx] with wrong color, cache_level=%x, color=%lx\n", 2694 obj->gtt_space->start, 2695 obj->gtt_space->start + obj->gtt_space->size, 2696 obj->cache_level, 2697 obj->gtt_space->color); 2698 err++; 2699 continue; 2700 } 2701 2702 if (!i915_gem_valid_gtt_space(dev, 2703 obj->gtt_space, 2704 obj->cache_level)) { 2705 printk(KERN_ERR "invalid GTT space found at [%08lx, %08lx] - color=%x\n", 2706 obj->gtt_space->start, 2707 obj->gtt_space->start + obj->gtt_space->size, 2708 obj->cache_level); 2709 err++; 2710 continue; 2711 } 2712 } 2713 2714 WARN_ON(err); 2715 #endif 2716 } 2717 2718 /** 2719 * Finds free space in the GTT aperture and binds the object there. 2720 */ 2721 static int 2722 i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, 2723 unsigned alignment, 2724 bool map_and_fenceable, 2725 bool nonblocking) 2726 { 2727 struct drm_device *dev = obj->base.dev; 2728 drm_i915_private_t *dev_priv = dev->dev_private; 2729 struct drm_mm_node *node; 2730 u32 size, fence_size, fence_alignment, unfenced_alignment; 2731 bool mappable, fenceable; 2732 size_t gtt_max = map_and_fenceable ? 2733 dev_priv->gtt.mappable_end : dev_priv->gtt.total; 2734 int ret; 2735 2736 fence_size = i915_gem_get_gtt_size(dev, 2737 obj->base.size, 2738 obj->tiling_mode); 2739 fence_alignment = i915_gem_get_gtt_alignment(dev, 2740 obj->base.size, 2741 obj->tiling_mode, true); 2742 unfenced_alignment = 2743 i915_gem_get_gtt_alignment(dev, 2744 obj->base.size, 2745 obj->tiling_mode, false); 2746 2747 if (alignment == 0) 2748 alignment = map_and_fenceable ? fence_alignment : 2749 unfenced_alignment; 2750 if (map_and_fenceable && alignment & (fence_alignment - 1)) { 2751 DRM_ERROR("Invalid object alignment requested %u\n", alignment); 2752 return -EINVAL; 2753 } 2754 2755 size = map_and_fenceable ? fence_size : obj->base.size; 2756 2757 /* If the object is bigger than the entire aperture, reject it early 2758 * before evicting everything in a vain attempt to find space. 2759 */ 2760 if (obj->base.size > gtt_max) { 2761 DRM_ERROR("Attempting to bind an object larger than the aperture: object=%zd > %s aperture=%zu\n", 2762 obj->base.size, 2763 map_and_fenceable ? "mappable" : "total", 2764 gtt_max); 2765 return -E2BIG; 2766 } 2767 2768 search_free: 2769 if (map_and_fenceable) 2770 node = drm_mm_search_free_in_range_color(&dev_priv->mm.gtt_space, 2771 size, alignment, obj->cache_level, 2772 0, dev_priv->gtt.mappable_end, 2773 false); 2774 else 2775 node = drm_mm_search_free_color(&dev_priv->mm.gtt_space, 2776 size, alignment, obj->cache_level, 2777 false); 2778 if (node != NULL) { 2779 if (map_and_fenceable) 2780 obj->gtt_space = 2781 drm_mm_get_block_range_generic(node, 2782 size, alignment, obj->cache_level, 2783 0, dev_priv->gtt.mappable_end, 2784 false); 2785 else 2786 obj->gtt_space = 2787 drm_mm_get_block_generic(node, 2788 size, alignment, obj->cache_level, 2789 false); 2790 } 2791 if (obj->gtt_space == NULL) { 2792 ret = i915_gem_evict_something(dev, size, alignment, 2793 obj->cache_level, 2794 map_and_fenceable, 2795 nonblocking); 2796 if (ret) 2797 return ret; 2798 2799 goto search_free; 2800 } 2801 2802 /* 2803 * NOTE: i915_gem_object_get_pages_gtt() cannot 2804 * return ENOMEM, since we used VM_ALLOC_RETRY. 2805 */ 2806 ret = i915_gem_object_get_pages_gtt(obj); 2807 if (ret != 0) { 2808 drm_mm_put_block(obj->gtt_space); 2809 obj->gtt_space = NULL; 2810 return ret; 2811 } 2812 2813 i915_gem_gtt_bind_object(obj, obj->cache_level); 2814 if (ret != 0) { 2815 i915_gem_object_put_pages_gtt(obj); 2816 drm_mm_put_block(obj->gtt_space); 2817 obj->gtt_space = NULL; 2818 if (i915_gem_evict_everything(dev)) 2819 return (ret); 2820 goto search_free; 2821 } 2822 2823 list_add_tail(&obj->global_list, &dev_priv->mm.bound_list); 2824 list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list); 2825 2826 obj->gtt_offset = obj->gtt_space->start; 2827 2828 fenceable = 2829 obj->gtt_space->size == fence_size && 2830 (obj->gtt_space->start & (fence_alignment - 1)) == 0; 2831 2832 mappable = 2833 obj->gtt_offset + obj->base.size <= dev_priv->gtt.mappable_end; 2834 2835 obj->map_and_fenceable = mappable && fenceable; 2836 2837 trace_i915_gem_object_bind(obj, map_and_fenceable); 2838 i915_gem_verify_gtt(dev); 2839 return 0; 2840 } 2841 2842 void 2843 i915_gem_clflush_object(struct drm_i915_gem_object *obj) 2844 { 2845 2846 /* If we don't have a page list set up, then we're not pinned 2847 * to GPU, and we can ignore the cache flush because it'll happen 2848 * again at bind time. 2849 */ 2850 if (obj->pages == NULL) 2851 return; 2852 2853 /* 2854 * Stolen memory is always coherent with the GPU as it is explicitly 2855 * marked as wc by the system, or the system is cache-coherent. 2856 */ 2857 if (obj->stolen) 2858 return; 2859 2860 /* If the GPU is snooping the contents of the CPU cache, 2861 * we do not need to manually clear the CPU cache lines. However, 2862 * the caches are only snooped when the render cache is 2863 * flushed/invalidated. As we always have to emit invalidations 2864 * and flushes when moving into and out of the RENDER domain, correct 2865 * snooping behaviour occurs naturally as the result of our domain 2866 * tracking. 2867 */ 2868 if (obj->cache_level != I915_CACHE_NONE) 2869 return; 2870 2871 drm_clflush_pages(obj->pages, obj->base.size / PAGE_SIZE); 2872 } 2873 2874 /** Flushes the GTT write domain for the object if it's dirty. */ 2875 static void 2876 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) 2877 { 2878 uint32_t old_write_domain; 2879 2880 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) 2881 return; 2882 2883 /* No actual flushing is required for the GTT write domain. Writes 2884 * to it immediately go to main memory as far as we know, so there's 2885 * no chipset flush. It also doesn't land in render cache. 2886 * 2887 * However, we do have to enforce the order so that all writes through 2888 * the GTT land before any writes to the device, such as updates to 2889 * the GATT itself. 2890 */ 2891 cpu_sfence(); 2892 2893 old_write_domain = obj->base.write_domain; 2894 obj->base.write_domain = 0; 2895 } 2896 2897 /** Flushes the CPU write domain for the object if it's dirty. */ 2898 static void 2899 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) 2900 { 2901 uint32_t old_write_domain; 2902 2903 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) 2904 return; 2905 2906 i915_gem_clflush_object(obj); 2907 i915_gem_chipset_flush(obj->base.dev); 2908 old_write_domain = obj->base.write_domain; 2909 obj->base.write_domain = 0; 2910 } 2911 2912 /** 2913 * Moves a single object to the GTT read, and possibly write domain. 2914 * 2915 * This function returns when the move is complete, including waiting on 2916 * flushes to occur. 2917 */ 2918 int 2919 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 2920 { 2921 drm_i915_private_t *dev_priv = obj->base.dev->dev_private; 2922 uint32_t old_write_domain, old_read_domains; 2923 int ret; 2924 2925 /* Not valid to be called on unbound objects. */ 2926 if (obj->gtt_space == NULL) 2927 return -EINVAL; 2928 2929 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) 2930 return 0; 2931 2932 ret = i915_gem_object_wait_rendering(obj, !write); 2933 if (ret) 2934 return ret; 2935 2936 i915_gem_object_flush_cpu_write_domain(obj); 2937 2938 /* Serialise direct access to this object with the barriers for 2939 * coherent writes from the GPU, by effectively invalidating the 2940 * GTT domain upon first access. 2941 */ 2942 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 2943 cpu_mfence(); 2944 2945 old_write_domain = obj->base.write_domain; 2946 old_read_domains = obj->base.read_domains; 2947 2948 /* It should now be out of any other write domains, and we can update 2949 * the domain values for our changes. 2950 */ 2951 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 2952 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 2953 if (write) { 2954 obj->base.read_domains = I915_GEM_DOMAIN_GTT; 2955 obj->base.write_domain = I915_GEM_DOMAIN_GTT; 2956 obj->dirty = 1; 2957 } 2958 2959 /* And bump the LRU for this access */ 2960 if (i915_gem_object_is_inactive(obj)) 2961 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list); 2962 2963 return 0; 2964 } 2965 2966 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 2967 enum i915_cache_level cache_level) 2968 { 2969 struct drm_device *dev = obj->base.dev; 2970 drm_i915_private_t *dev_priv = dev->dev_private; 2971 int ret; 2972 2973 if (obj->cache_level == cache_level) 2974 return 0; 2975 2976 if (obj->pin_count) { 2977 DRM_DEBUG("can not change the cache level of pinned objects\n"); 2978 return -EBUSY; 2979 } 2980 2981 if (!i915_gem_valid_gtt_space(dev, obj->gtt_space, cache_level)) { 2982 ret = i915_gem_object_unbind(obj); 2983 if (ret) 2984 return ret; 2985 } 2986 2987 if (obj->gtt_space) { 2988 ret = i915_gem_object_finish_gpu(obj); 2989 if (ret) 2990 return ret; 2991 2992 i915_gem_object_finish_gtt(obj); 2993 2994 /* Before SandyBridge, you could not use tiling or fence 2995 * registers with snooped memory, so relinquish any fences 2996 * currently pointing to our region in the aperture. 2997 */ 2998 if (INTEL_INFO(dev)->gen < 6) { 2999 ret = i915_gem_object_put_fence(obj); 3000 if (ret) 3001 return ret; 3002 } 3003 3004 if (obj->has_global_gtt_mapping) 3005 i915_gem_gtt_bind_object(obj, cache_level); 3006 if (obj->has_aliasing_ppgtt_mapping) 3007 i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt, 3008 obj, cache_level); 3009 3010 obj->gtt_space->color = cache_level; 3011 } 3012 3013 if (cache_level == I915_CACHE_NONE) { 3014 u32 old_read_domains, old_write_domain; 3015 3016 /* If we're coming from LLC cached, then we haven't 3017 * actually been tracking whether the data is in the 3018 * CPU cache or not, since we only allow one bit set 3019 * in obj->write_domain and have been skipping the clflushes. 3020 * Just set it to the CPU cache for now. 3021 */ 3022 WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU); 3023 WARN_ON(obj->base.read_domains & ~I915_GEM_DOMAIN_CPU); 3024 3025 old_read_domains = obj->base.read_domains; 3026 old_write_domain = obj->base.write_domain; 3027 3028 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 3029 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 3030 3031 } 3032 3033 obj->cache_level = cache_level; 3034 i915_gem_verify_gtt(dev); 3035 return 0; 3036 } 3037 3038 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 3039 struct drm_file *file) 3040 { 3041 struct drm_i915_gem_caching *args = data; 3042 struct drm_i915_gem_object *obj; 3043 int ret; 3044 3045 ret = i915_mutex_lock_interruptible(dev); 3046 if (ret) 3047 return ret; 3048 3049 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3050 if (&obj->base == NULL) { 3051 ret = -ENOENT; 3052 goto unlock; 3053 } 3054 3055 args->caching = obj->cache_level != I915_CACHE_NONE; 3056 3057 drm_gem_object_unreference(&obj->base); 3058 unlock: 3059 mutex_unlock(&dev->struct_mutex); 3060 return ret; 3061 } 3062 3063 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 3064 struct drm_file *file) 3065 { 3066 struct drm_i915_gem_caching *args = data; 3067 struct drm_i915_gem_object *obj; 3068 enum i915_cache_level level; 3069 int ret; 3070 3071 switch (args->caching) { 3072 case I915_CACHING_NONE: 3073 level = I915_CACHE_NONE; 3074 break; 3075 case I915_CACHING_CACHED: 3076 level = I915_CACHE_LLC; 3077 break; 3078 default: 3079 return -EINVAL; 3080 } 3081 3082 ret = i915_mutex_lock_interruptible(dev); 3083 if (ret) 3084 return ret; 3085 3086 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3087 if (&obj->base == NULL) { 3088 ret = -ENOENT; 3089 goto unlock; 3090 } 3091 3092 ret = i915_gem_object_set_cache_level(obj, level); 3093 3094 drm_gem_object_unreference(&obj->base); 3095 unlock: 3096 mutex_unlock(&dev->struct_mutex); 3097 return ret; 3098 } 3099 3100 /* 3101 * Prepare buffer for display plane (scanout, cursors, etc). 3102 * Can be called from an uninterruptible phase (modesetting) and allows 3103 * any flushes to be pipelined (for pageflips). 3104 */ 3105 int 3106 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 3107 u32 alignment, 3108 struct intel_ring_buffer *pipelined) 3109 { 3110 u32 old_read_domains, old_write_domain; 3111 int ret; 3112 3113 if (pipelined != obj->ring) { 3114 ret = i915_gem_object_sync(obj, pipelined); 3115 if (ret) 3116 return ret; 3117 } 3118 3119 /* The display engine is not coherent with the LLC cache on gen6. As 3120 * a result, we make sure that the pinning that is about to occur is 3121 * done with uncached PTEs. This is lowest common denominator for all 3122 * chipsets. 3123 * 3124 * However for gen6+, we could do better by using the GFDT bit instead 3125 * of uncaching, which would allow us to flush all the LLC-cached data 3126 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 3127 */ 3128 ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE); 3129 if (ret) 3130 return ret; 3131 3132 /* As the user may map the buffer once pinned in the display plane 3133 * (e.g. libkms for the bootup splash), we have to ensure that we 3134 * always use map_and_fenceable for all scanout buffers. 3135 */ 3136 ret = i915_gem_object_pin(obj, alignment, true, false); 3137 if (ret) 3138 return ret; 3139 3140 i915_gem_object_flush_cpu_write_domain(obj); 3141 3142 old_write_domain = obj->base.write_domain; 3143 old_read_domains = obj->base.read_domains; 3144 3145 /* It should now be out of any other write domains, and we can update 3146 * the domain values for our changes. 3147 */ 3148 obj->base.write_domain = 0; 3149 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 3150 3151 return 0; 3152 } 3153 3154 int 3155 i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj) 3156 { 3157 int ret; 3158 3159 if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0) 3160 return 0; 3161 3162 ret = i915_gem_object_wait_rendering(obj, false); 3163 if (ret) 3164 return ret; 3165 3166 /* Ensure that we invalidate the GPU's caches and TLBs. */ 3167 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS; 3168 return 0; 3169 } 3170 3171 /** 3172 * Moves a single object to the CPU read, and possibly write domain. 3173 * 3174 * This function returns when the move is complete, including waiting on 3175 * flushes to occur. 3176 */ 3177 int 3178 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 3179 { 3180 uint32_t old_write_domain, old_read_domains; 3181 int ret; 3182 3183 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) 3184 return 0; 3185 3186 ret = i915_gem_object_wait_rendering(obj, !write); 3187 if (ret) 3188 return ret; 3189 3190 i915_gem_object_flush_gtt_write_domain(obj); 3191 3192 old_write_domain = obj->base.write_domain; 3193 old_read_domains = obj->base.read_domains; 3194 3195 /* Flush the CPU cache if it's still invalid. */ 3196 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { 3197 i915_gem_clflush_object(obj); 3198 3199 obj->base.read_domains |= I915_GEM_DOMAIN_CPU; 3200 } 3201 3202 /* It should now be out of any other write domains, and we can update 3203 * the domain values for our changes. 3204 */ 3205 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 3206 3207 /* If we're writing through the CPU, then the GPU read domains will 3208 * need to be invalidated at next use. 3209 */ 3210 if (write) { 3211 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 3212 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 3213 } 3214 3215 return 0; 3216 } 3217 3218 /* Throttle our rendering by waiting until the ring has completed our requests 3219 * emitted over 20 msec ago. 3220 * 3221 * Note that if we were to use the current jiffies each time around the loop, 3222 * we wouldn't escape the function with any frames outstanding if the time to 3223 * render a frame was over 20ms. 3224 * 3225 * This should get us reasonable parallelism between CPU and GPU but also 3226 * relatively low latency when blocking on a particular request to finish. 3227 */ 3228 static int 3229 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 3230 { 3231 struct drm_i915_private *dev_priv = dev->dev_private; 3232 struct drm_i915_file_private *file_priv = file->driver_priv; 3233 unsigned long recent_enough = jiffies - msecs_to_jiffies(20); 3234 struct drm_i915_gem_request *request; 3235 struct intel_ring_buffer *ring = NULL; 3236 unsigned reset_counter; 3237 u32 seqno = 0; 3238 int ret; 3239 3240 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 3241 if (ret) 3242 return ret; 3243 3244 ret = i915_gem_check_wedge(&dev_priv->gpu_error, false); 3245 if (ret) 3246 return ret; 3247 3248 spin_lock(&file_priv->mm.lock); 3249 list_for_each_entry(request, &file_priv->mm.request_list, client_list) { 3250 if (time_after_eq(request->emitted_jiffies, recent_enough)) 3251 break; 3252 3253 ring = request->ring; 3254 seqno = request->seqno; 3255 } 3256 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 3257 spin_unlock(&file_priv->mm.lock); 3258 3259 if (seqno == 0) 3260 return 0; 3261 3262 ret = __wait_seqno(ring, seqno, reset_counter, true, NULL); 3263 if (ret == 0) 3264 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0); 3265 3266 return ret; 3267 } 3268 3269 int 3270 i915_gem_object_pin(struct drm_i915_gem_object *obj, 3271 uint32_t alignment, 3272 bool map_and_fenceable, 3273 bool nonblocking) 3274 { 3275 int ret; 3276 3277 if (WARN_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) 3278 return -EBUSY; 3279 3280 if (obj->gtt_space != NULL) { 3281 if ((alignment && obj->gtt_offset & (alignment - 1)) || 3282 (map_and_fenceable && !obj->map_and_fenceable)) { 3283 WARN(obj->pin_count, 3284 "bo is already pinned with incorrect alignment:" 3285 " offset=%x, req.alignment=%x, req.map_and_fenceable=%d," 3286 " obj->map_and_fenceable=%d\n", 3287 obj->gtt_offset, alignment, 3288 map_and_fenceable, 3289 obj->map_and_fenceable); 3290 ret = i915_gem_object_unbind(obj); 3291 if (ret) 3292 return ret; 3293 } 3294 } 3295 3296 if (obj->gtt_space == NULL) { 3297 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3298 3299 ret = i915_gem_object_bind_to_gtt(obj, alignment, 3300 map_and_fenceable, 3301 nonblocking); 3302 if (ret) 3303 return ret; 3304 3305 if (!dev_priv->mm.aliasing_ppgtt) 3306 i915_gem_gtt_bind_object(obj, obj->cache_level); 3307 } 3308 3309 if (!obj->has_global_gtt_mapping && map_and_fenceable) 3310 i915_gem_gtt_bind_object(obj, obj->cache_level); 3311 3312 obj->pin_count++; 3313 obj->pin_mappable |= map_and_fenceable; 3314 3315 return 0; 3316 } 3317 3318 void 3319 i915_gem_object_unpin(struct drm_i915_gem_object *obj) 3320 { 3321 BUG_ON(obj->pin_count == 0); 3322 BUG_ON(obj->gtt_space == NULL); 3323 3324 if (--obj->pin_count == 0) 3325 obj->pin_mappable = false; 3326 } 3327 3328 int 3329 i915_gem_pin_ioctl(struct drm_device *dev, void *data, 3330 struct drm_file *file) 3331 { 3332 struct drm_i915_gem_pin *args = data; 3333 struct drm_i915_gem_object *obj; 3334 int ret; 3335 3336 ret = i915_mutex_lock_interruptible(dev); 3337 if (ret) 3338 return ret; 3339 3340 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3341 if (&obj->base == NULL) { 3342 ret = -ENOENT; 3343 goto unlock; 3344 } 3345 3346 if (obj->madv != I915_MADV_WILLNEED) { 3347 DRM_ERROR("Attempting to pin a purgeable buffer\n"); 3348 ret = -EINVAL; 3349 goto out; 3350 } 3351 3352 if (obj->pin_filp != NULL && obj->pin_filp != file) { 3353 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n", 3354 args->handle); 3355 ret = -EINVAL; 3356 goto out; 3357 } 3358 3359 if (obj->user_pin_count == 0) { 3360 ret = i915_gem_object_pin(obj, args->alignment, true, false); 3361 if (ret) 3362 goto out; 3363 } 3364 3365 obj->user_pin_count++; 3366 obj->pin_filp = file; 3367 3368 /* XXX - flush the CPU caches for pinned objects 3369 * as the X server doesn't manage domains yet 3370 */ 3371 i915_gem_object_flush_cpu_write_domain(obj); 3372 args->offset = obj->gtt_offset; 3373 out: 3374 drm_gem_object_unreference(&obj->base); 3375 unlock: 3376 mutex_unlock(&dev->struct_mutex); 3377 return ret; 3378 } 3379 3380 int 3381 i915_gem_unpin_ioctl(struct drm_device *dev, void *data, 3382 struct drm_file *file) 3383 { 3384 struct drm_i915_gem_pin *args = data; 3385 struct drm_i915_gem_object *obj; 3386 int ret; 3387 3388 ret = i915_mutex_lock_interruptible(dev); 3389 if (ret) 3390 return ret; 3391 3392 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3393 if (&obj->base == NULL) { 3394 ret = -ENOENT; 3395 goto unlock; 3396 } 3397 3398 if (obj->pin_filp != file) { 3399 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n", 3400 args->handle); 3401 ret = -EINVAL; 3402 goto out; 3403 } 3404 obj->user_pin_count--; 3405 if (obj->user_pin_count == 0) { 3406 obj->pin_filp = NULL; 3407 i915_gem_object_unpin(obj); 3408 } 3409 3410 out: 3411 drm_gem_object_unreference(&obj->base); 3412 unlock: 3413 mutex_unlock(&dev->struct_mutex); 3414 return ret; 3415 } 3416 3417 int 3418 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 3419 struct drm_file *file) 3420 { 3421 struct drm_i915_gem_busy *args = data; 3422 struct drm_i915_gem_object *obj; 3423 int ret; 3424 3425 ret = i915_mutex_lock_interruptible(dev); 3426 if (ret) 3427 return ret; 3428 3429 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3430 if (&obj->base == NULL) { 3431 ret = -ENOENT; 3432 goto unlock; 3433 } 3434 3435 /* Count all active objects as busy, even if they are currently not used 3436 * by the gpu. Users of this interface expect objects to eventually 3437 * become non-busy without any further actions, therefore emit any 3438 * necessary flushes here. 3439 */ 3440 ret = i915_gem_object_flush_active(obj); 3441 3442 args->busy = obj->active; 3443 if (obj->ring) { 3444 args->busy |= intel_ring_flag(obj->ring) << 16; 3445 } 3446 3447 drm_gem_object_unreference(&obj->base); 3448 unlock: 3449 mutex_unlock(&dev->struct_mutex); 3450 return ret; 3451 } 3452 3453 int 3454 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 3455 struct drm_file *file_priv) 3456 { 3457 return i915_gem_ring_throttle(dev, file_priv); 3458 } 3459 3460 int 3461 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 3462 struct drm_file *file_priv) 3463 { 3464 struct drm_i915_gem_madvise *args = data; 3465 struct drm_i915_gem_object *obj; 3466 int ret; 3467 3468 switch (args->madv) { 3469 case I915_MADV_DONTNEED: 3470 case I915_MADV_WILLNEED: 3471 break; 3472 default: 3473 return -EINVAL; 3474 } 3475 3476 ret = i915_mutex_lock_interruptible(dev); 3477 if (ret) 3478 return ret; 3479 3480 obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle)); 3481 if (&obj->base == NULL) { 3482 ret = -ENOENT; 3483 goto unlock; 3484 } 3485 3486 if (obj->pin_count) { 3487 ret = -EINVAL; 3488 goto out; 3489 } 3490 3491 if (obj->madv != __I915_MADV_PURGED) 3492 obj->madv = args->madv; 3493 3494 /* if the object is no longer attached, discard its backing storage */ 3495 if (i915_gem_object_is_purgeable(obj) && obj->pages == NULL) 3496 i915_gem_object_truncate(obj); 3497 3498 args->retained = obj->madv != __I915_MADV_PURGED; 3499 3500 out: 3501 drm_gem_object_unreference(&obj->base); 3502 unlock: 3503 mutex_unlock(&dev->struct_mutex); 3504 return ret; 3505 } 3506 3507 void i915_gem_object_init(struct drm_i915_gem_object *obj, 3508 const struct drm_i915_gem_object_ops *ops) 3509 { 3510 INIT_LIST_HEAD(&obj->mm_list); 3511 INIT_LIST_HEAD(&obj->global_list); 3512 INIT_LIST_HEAD(&obj->ring_list); 3513 INIT_LIST_HEAD(&obj->exec_list); 3514 3515 obj->ops = ops; 3516 3517 obj->fence_reg = I915_FENCE_REG_NONE; 3518 obj->madv = I915_MADV_WILLNEED; 3519 /* Avoid an unnecessary call to unbind on the first bind. */ 3520 obj->map_and_fenceable = true; 3521 3522 i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size); 3523 } 3524 3525 static const struct drm_i915_gem_object_ops i915_gem_object_ops = { 3526 .get_pages = i915_gem_object_get_pages_gtt, 3527 .put_pages = i915_gem_object_put_pages_gtt, 3528 }; 3529 3530 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, 3531 size_t size) 3532 { 3533 struct drm_i915_gem_object *obj; 3534 #if 0 3535 struct address_space *mapping; 3536 u32 mask; 3537 #endif 3538 3539 obj = kmalloc(sizeof(*obj), M_DRM, M_WAITOK | M_ZERO); 3540 if (obj == NULL) 3541 return NULL; 3542 3543 if (drm_gem_object_init(dev, &obj->base, size) != 0) { 3544 kfree(obj); 3545 return NULL; 3546 } 3547 3548 #if 0 3549 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 3550 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) { 3551 /* 965gm cannot relocate objects above 4GiB. */ 3552 mask &= ~__GFP_HIGHMEM; 3553 mask |= __GFP_DMA32; 3554 } 3555 3556 mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 3557 mapping_set_gfp_mask(mapping, mask); 3558 #endif 3559 3560 i915_gem_object_init(obj, &i915_gem_object_ops); 3561 3562 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 3563 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 3564 3565 if (HAS_LLC(dev)) { 3566 /* On some devices, we can have the GPU use the LLC (the CPU 3567 * cache) for about a 10% performance improvement 3568 * compared to uncached. Graphics requests other than 3569 * display scanout are coherent with the CPU in 3570 * accessing this cache. This means in this mode we 3571 * don't need to clflush on the CPU side, and on the 3572 * GPU side we only need to flush internal caches to 3573 * get data visible to the CPU. 3574 * 3575 * However, we maintain the display planes as UC, and so 3576 * need to rebind when first used as such. 3577 */ 3578 obj->cache_level = I915_CACHE_LLC; 3579 } else 3580 obj->cache_level = I915_CACHE_NONE; 3581 3582 return obj; 3583 } 3584 3585 int i915_gem_init_object(struct drm_gem_object *obj) 3586 { 3587 BUG(); 3588 3589 return 0; 3590 } 3591 3592 void i915_gem_free_object(struct drm_gem_object *gem_obj) 3593 { 3594 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 3595 struct drm_device *dev = obj->base.dev; 3596 drm_i915_private_t *dev_priv = dev->dev_private; 3597 3598 if (obj->phys_obj) 3599 i915_gem_detach_phys_object(dev, obj); 3600 3601 obj->pin_count = 0; 3602 if (WARN_ON(i915_gem_object_unbind(obj) == -ERESTARTSYS)) { 3603 bool was_interruptible; 3604 3605 was_interruptible = dev_priv->mm.interruptible; 3606 dev_priv->mm.interruptible = false; 3607 3608 WARN_ON(i915_gem_object_unbind(obj)); 3609 3610 dev_priv->mm.interruptible = was_interruptible; 3611 } 3612 3613 /* Stolen objects don't hold a ref, but do hold pin count. Fix that up 3614 * before progressing. */ 3615 if (obj->stolen) 3616 i915_gem_object_unpin_pages(obj); 3617 3618 if (WARN_ON(obj->pages_pin_count)) 3619 obj->pages_pin_count = 0; 3620 i915_gem_object_put_pages(obj); 3621 drm_gem_free_mmap_offset(&obj->base); 3622 3623 BUG_ON(obj->pages); 3624 3625 drm_gem_object_release(&obj->base); 3626 i915_gem_info_remove_obj(dev_priv, obj->base.size); 3627 3628 kfree(obj->bit_17); 3629 i915_gem_object_free(obj); 3630 } 3631 3632 int 3633 i915_gem_idle(struct drm_device *dev) 3634 { 3635 drm_i915_private_t *dev_priv = dev->dev_private; 3636 int ret; 3637 3638 mutex_lock(&dev->struct_mutex); 3639 3640 if (dev_priv->mm.suspended) { 3641 mutex_unlock(&dev->struct_mutex); 3642 return 0; 3643 } 3644 3645 ret = i915_gpu_idle(dev); 3646 if (ret) { 3647 mutex_unlock(&dev->struct_mutex); 3648 return ret; 3649 } 3650 i915_gem_retire_requests(dev); 3651 3652 /* Under UMS, be paranoid and evict. */ 3653 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 3654 i915_gem_evict_everything(dev); 3655 3656 /* Hack! Don't let anybody do execbuf while we don't control the chip. 3657 * We need to replace this with a semaphore, or something. 3658 * And not confound mm.suspended! 3659 */ 3660 dev_priv->mm.suspended = 1; 3661 del_timer_sync(&dev_priv->gpu_error.hangcheck_timer); 3662 3663 i915_kernel_lost_context(dev); 3664 i915_gem_cleanup_ringbuffer(dev); 3665 3666 mutex_unlock(&dev->struct_mutex); 3667 3668 /* Cancel the retire work handler, which should be idle now. */ 3669 cancel_delayed_work_sync(&dev_priv->mm.retire_work); 3670 3671 return 0; 3672 } 3673 3674 void i915_gem_l3_remap(struct drm_device *dev) 3675 { 3676 drm_i915_private_t *dev_priv = dev->dev_private; 3677 u32 misccpctl; 3678 int i; 3679 3680 if (!HAS_L3_GPU_CACHE(dev)) 3681 return; 3682 3683 if (!dev_priv->l3_parity.remap_info) 3684 return; 3685 3686 misccpctl = I915_READ(GEN7_MISCCPCTL); 3687 I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE); 3688 POSTING_READ(GEN7_MISCCPCTL); 3689 3690 for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) { 3691 u32 remap = I915_READ(GEN7_L3LOG_BASE + i); 3692 if (remap && remap != dev_priv->l3_parity.remap_info[i/4]) 3693 DRM_DEBUG("0x%x was already programmed to %x\n", 3694 GEN7_L3LOG_BASE + i, remap); 3695 if (remap && !dev_priv->l3_parity.remap_info[i/4]) 3696 DRM_DEBUG_DRIVER("Clearing remapped register\n"); 3697 I915_WRITE(GEN7_L3LOG_BASE + i, dev_priv->l3_parity.remap_info[i/4]); 3698 } 3699 3700 /* Make sure all the writes land before disabling dop clock gating */ 3701 POSTING_READ(GEN7_L3LOG_BASE); 3702 3703 I915_WRITE(GEN7_MISCCPCTL, misccpctl); 3704 } 3705 3706 void i915_gem_init_swizzling(struct drm_device *dev) 3707 { 3708 drm_i915_private_t *dev_priv = dev->dev_private; 3709 3710 if (INTEL_INFO(dev)->gen < 5 || 3711 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 3712 return; 3713 3714 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 3715 DISP_TILE_SURFACE_SWIZZLING); 3716 3717 if (IS_GEN5(dev)) 3718 return; 3719 3720 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 3721 if (IS_GEN6(dev)) 3722 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 3723 else if (IS_GEN7(dev)) 3724 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 3725 else 3726 BUG(); 3727 } 3728 3729 static bool 3730 intel_enable_blt(struct drm_device *dev) 3731 { 3732 int revision; 3733 3734 if (!HAS_BLT(dev)) 3735 return false; 3736 3737 /* The blitter was dysfunctional on early prototypes */ 3738 revision = pci_read_config(dev->dev, PCIR_REVID, 1); 3739 if (IS_GEN6(dev) && revision < 8) { 3740 DRM_INFO("BLT not supported on this pre-production hardware;" 3741 " graphics performance will be degraded.\n"); 3742 return false; 3743 } 3744 3745 return true; 3746 } 3747 3748 static int i915_gem_init_rings(struct drm_device *dev) 3749 { 3750 struct drm_i915_private *dev_priv = dev->dev_private; 3751 int ret; 3752 3753 ret = intel_init_render_ring_buffer(dev); 3754 if (ret) 3755 return ret; 3756 3757 if (HAS_BSD(dev)) { 3758 ret = intel_init_bsd_ring_buffer(dev); 3759 if (ret) 3760 goto cleanup_render_ring; 3761 } 3762 3763 if (intel_enable_blt(dev)) { 3764 ret = intel_init_blt_ring_buffer(dev); 3765 if (ret) 3766 goto cleanup_bsd_ring; 3767 } 3768 3769 if (HAS_VEBOX(dev)) { 3770 ret = intel_init_vebox_ring_buffer(dev); 3771 if (ret) 3772 goto cleanup_blt_ring; 3773 } 3774 3775 3776 ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000)); 3777 if (ret) 3778 goto cleanup_vebox_ring; 3779 3780 return 0; 3781 3782 cleanup_vebox_ring: 3783 intel_cleanup_ring_buffer(&dev_priv->ring[VECS]); 3784 cleanup_blt_ring: 3785 intel_cleanup_ring_buffer(&dev_priv->ring[BCS]); 3786 cleanup_bsd_ring: 3787 intel_cleanup_ring_buffer(&dev_priv->ring[VCS]); 3788 cleanup_render_ring: 3789 intel_cleanup_ring_buffer(&dev_priv->ring[RCS]); 3790 3791 return ret; 3792 } 3793 3794 int 3795 i915_gem_init_hw(struct drm_device *dev) 3796 { 3797 drm_i915_private_t *dev_priv = dev->dev_private; 3798 int ret; 3799 3800 #if 0 3801 if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) 3802 return -EIO; 3803 #endif 3804 3805 if (IS_HASWELL(dev) && (I915_READ(0x120010) == 1)) 3806 I915_WRITE(0x9008, I915_READ(0x9008) | 0xf0000); 3807 3808 if (HAS_PCH_NOP(dev)) { 3809 u32 temp = I915_READ(GEN7_MSG_CTL); 3810 temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK); 3811 I915_WRITE(GEN7_MSG_CTL, temp); 3812 } 3813 3814 i915_gem_l3_remap(dev); 3815 3816 i915_gem_init_swizzling(dev); 3817 3818 ret = i915_gem_init_rings(dev); 3819 if (ret) 3820 return ret; 3821 3822 /* 3823 * XXX: There was some w/a described somewhere suggesting loading 3824 * contexts before PPGTT. 3825 */ 3826 i915_gem_context_init(dev); 3827 if (dev_priv->mm.aliasing_ppgtt) { 3828 ret = dev_priv->mm.aliasing_ppgtt->enable(dev); 3829 if (ret) { 3830 i915_gem_cleanup_aliasing_ppgtt(dev); 3831 DRM_INFO("PPGTT enable failed. This is not fatal, but unexpected\n"); 3832 } 3833 } 3834 3835 return 0; 3836 } 3837 3838 int i915_gem_init(struct drm_device *dev) 3839 { 3840 struct drm_i915_private *dev_priv = dev->dev_private; 3841 int ret; 3842 3843 mutex_lock(&dev->struct_mutex); 3844 3845 if (IS_VALLEYVIEW(dev)) { 3846 /* VLVA0 (potential hack), BIOS isn't actually waking us */ 3847 I915_WRITE(VLV_GTLC_WAKE_CTRL, 1); 3848 if (wait_for((I915_READ(VLV_GTLC_PW_STATUS) & 1) == 1, 10)) 3849 DRM_DEBUG_DRIVER("allow wake ack timed out\n"); 3850 } 3851 3852 i915_gem_init_global_gtt(dev); 3853 3854 ret = i915_gem_init_hw(dev); 3855 mutex_unlock(&dev->struct_mutex); 3856 if (ret) { 3857 i915_gem_cleanup_aliasing_ppgtt(dev); 3858 return ret; 3859 } 3860 3861 /* Allow hardware batchbuffers unless told otherwise, but not for KMS. */ 3862 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 3863 dev_priv->dri1.allow_batchbuffer = 1; 3864 return 0; 3865 } 3866 3867 void 3868 i915_gem_cleanup_ringbuffer(struct drm_device *dev) 3869 { 3870 drm_i915_private_t *dev_priv = dev->dev_private; 3871 struct intel_ring_buffer *ring; 3872 int i; 3873 3874 for_each_ring(ring, dev_priv, i) 3875 intel_cleanup_ring_buffer(ring); 3876 } 3877 3878 int 3879 i915_gem_entervt_ioctl(struct drm_device *dev, void *data, 3880 struct drm_file *file_priv) 3881 { 3882 drm_i915_private_t *dev_priv = dev->dev_private; 3883 int ret; 3884 3885 if (drm_core_check_feature(dev, DRIVER_MODESET)) 3886 return 0; 3887 3888 if (i915_reset_in_progress(&dev_priv->gpu_error)) { 3889 DRM_ERROR("Reenabling wedged hardware, good luck\n"); 3890 atomic_set(&dev_priv->gpu_error.reset_counter, 0); 3891 } 3892 3893 mutex_lock(&dev->struct_mutex); 3894 dev_priv->mm.suspended = 0; 3895 3896 ret = i915_gem_init_hw(dev); 3897 if (ret != 0) { 3898 mutex_unlock(&dev->struct_mutex); 3899 return ret; 3900 } 3901 3902 KASSERT(list_empty(&dev_priv->mm.active_list), ("active list")); 3903 mutex_unlock(&dev->struct_mutex); 3904 3905 ret = drm_irq_install(dev); 3906 if (ret) 3907 goto cleanup_ringbuffer; 3908 3909 return 0; 3910 3911 cleanup_ringbuffer: 3912 mutex_lock(&dev->struct_mutex); 3913 i915_gem_cleanup_ringbuffer(dev); 3914 dev_priv->mm.suspended = 1; 3915 mutex_unlock(&dev->struct_mutex); 3916 3917 return ret; 3918 } 3919 3920 int 3921 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, 3922 struct drm_file *file_priv) 3923 { 3924 if (drm_core_check_feature(dev, DRIVER_MODESET)) 3925 return 0; 3926 3927 drm_irq_uninstall(dev); 3928 return i915_gem_idle(dev); 3929 } 3930 3931 void 3932 i915_gem_lastclose(struct drm_device *dev) 3933 { 3934 int ret; 3935 3936 if (drm_core_check_feature(dev, DRIVER_MODESET)) 3937 return; 3938 3939 ret = i915_gem_idle(dev); 3940 if (ret) 3941 DRM_ERROR("failed to idle hardware: %d\n", ret); 3942 } 3943 3944 static void 3945 init_ring_lists(struct intel_ring_buffer *ring) 3946 { 3947 INIT_LIST_HEAD(&ring->active_list); 3948 INIT_LIST_HEAD(&ring->request_list); 3949 } 3950 3951 void 3952 i915_gem_load(struct drm_device *dev) 3953 { 3954 int i; 3955 drm_i915_private_t *dev_priv = dev->dev_private; 3956 3957 INIT_LIST_HEAD(&dev_priv->mm.active_list); 3958 INIT_LIST_HEAD(&dev_priv->mm.inactive_list); 3959 INIT_LIST_HEAD(&dev_priv->mm.unbound_list); 3960 INIT_LIST_HEAD(&dev_priv->mm.bound_list); 3961 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 3962 for (i = 0; i < I915_NUM_RINGS; i++) 3963 init_ring_lists(&dev_priv->ring[i]); 3964 for (i = 0; i < I915_MAX_NUM_FENCES; i++) 3965 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); 3966 INIT_DELAYED_WORK(&dev_priv->mm.retire_work, 3967 i915_gem_retire_work_handler); 3968 init_waitqueue_head(&dev_priv->gpu_error.reset_queue); 3969 3970 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */ 3971 if (IS_GEN3(dev)) { 3972 I915_WRITE(MI_ARB_STATE, 3973 _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE)); 3974 } 3975 3976 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; 3977 3978 /* Old X drivers will take 0-2 for front, back, depth buffers */ 3979 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 3980 dev_priv->fence_reg_start = 3; 3981 3982 if (INTEL_INFO(dev)->gen >= 7 && !IS_VALLEYVIEW(dev)) 3983 dev_priv->num_fence_regs = 32; 3984 else if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) 3985 dev_priv->num_fence_regs = 16; 3986 else 3987 dev_priv->num_fence_regs = 8; 3988 3989 /* Initialize fence registers to zero */ 3990 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 3991 i915_gem_restore_fences(dev); 3992 3993 i915_gem_detect_bit_6_swizzle(dev); 3994 init_waitqueue_head(&dev_priv->pending_flip_queue); 3995 3996 dev_priv->mm.interruptible = true; 3997 3998 #if 0 3999 dev_priv->mm.inactive_shrinker.shrink = i915_gem_inactive_shrink; 4000 dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS; 4001 register_shrinker(&dev_priv->mm.inactive_shrinker); 4002 #else 4003 dev_priv->mm.inactive_shrinker = EVENTHANDLER_REGISTER(vm_lowmem, 4004 i915_gem_lowmem, dev, EVENTHANDLER_PRI_ANY); 4005 #endif 4006 } 4007 4008 /* 4009 * Create a physically contiguous memory object for this object 4010 * e.g. for cursor + overlay regs 4011 */ 4012 static int i915_gem_init_phys_object(struct drm_device *dev, 4013 int id, int size, int align) 4014 { 4015 drm_i915_private_t *dev_priv = dev->dev_private; 4016 struct drm_i915_gem_phys_object *phys_obj; 4017 int ret; 4018 4019 if (dev_priv->mm.phys_objs[id - 1] || !size) 4020 return 0; 4021 4022 phys_obj = kzalloc(sizeof(struct drm_i915_gem_phys_object), GFP_KERNEL); 4023 if (!phys_obj) 4024 return -ENOMEM; 4025 4026 phys_obj->id = id; 4027 4028 phys_obj->handle = drm_pci_alloc(dev, size, align); 4029 if (!phys_obj->handle) { 4030 ret = -ENOMEM; 4031 goto kfree_obj; 4032 } 4033 pmap_change_attr((vm_offset_t)phys_obj->handle->vaddr, 4034 size / PAGE_SIZE, PAT_WRITE_COMBINING); 4035 4036 dev_priv->mm.phys_objs[id - 1] = phys_obj; 4037 4038 return 0; 4039 4040 kfree_obj: 4041 drm_free(phys_obj, M_DRM); 4042 return ret; 4043 } 4044 4045 static void i915_gem_free_phys_object(struct drm_device *dev, int id) 4046 { 4047 drm_i915_private_t *dev_priv = dev->dev_private; 4048 struct drm_i915_gem_phys_object *phys_obj; 4049 4050 if (!dev_priv->mm.phys_objs[id - 1]) 4051 return; 4052 4053 phys_obj = dev_priv->mm.phys_objs[id - 1]; 4054 if (phys_obj->cur_obj) { 4055 i915_gem_detach_phys_object(dev, phys_obj->cur_obj); 4056 } 4057 4058 drm_pci_free(dev, phys_obj->handle); 4059 drm_free(phys_obj, M_DRM); 4060 dev_priv->mm.phys_objs[id - 1] = NULL; 4061 } 4062 4063 void i915_gem_free_all_phys_object(struct drm_device *dev) 4064 { 4065 int i; 4066 4067 for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++) 4068 i915_gem_free_phys_object(dev, i); 4069 } 4070 4071 void i915_gem_detach_phys_object(struct drm_device *dev, 4072 struct drm_i915_gem_object *obj) 4073 { 4074 struct vm_object *mapping = obj->base.vm_obj; 4075 char *vaddr; 4076 int i; 4077 int page_count; 4078 4079 if (!obj->phys_obj) 4080 return; 4081 vaddr = obj->phys_obj->handle->vaddr; 4082 4083 page_count = obj->base.size / PAGE_SIZE; 4084 VM_OBJECT_LOCK(obj->base.vm_obj); 4085 for (i = 0; i < page_count; i++) { 4086 struct vm_page *page = shmem_read_mapping_page(mapping, i); 4087 if (!IS_ERR(page)) { 4088 VM_OBJECT_UNLOCK(obj->base.vm_obj); 4089 char *dst = kmap_atomic(page); 4090 memcpy(dst, vaddr + i*PAGE_SIZE, PAGE_SIZE); 4091 kunmap_atomic(dst); 4092 4093 drm_clflush_pages(&page, 1); 4094 4095 #if 0 4096 set_page_dirty(page); 4097 mark_page_accessed(page); 4098 page_cache_release(page); 4099 #endif 4100 VM_OBJECT_LOCK(obj->base.vm_obj); 4101 vm_page_reference(page); 4102 vm_page_dirty(page); 4103 vm_page_busy_wait(page, FALSE, "i915gem"); 4104 vm_page_unwire(page, 0); 4105 vm_page_wakeup(page); 4106 } 4107 } 4108 VM_OBJECT_UNLOCK(obj->base.vm_obj); 4109 intel_gtt_chipset_flush(); 4110 4111 obj->phys_obj->cur_obj = NULL; 4112 obj->phys_obj = NULL; 4113 } 4114 4115 int 4116 i915_gem_attach_phys_object(struct drm_device *dev, 4117 struct drm_i915_gem_object *obj, 4118 int id, 4119 int align) 4120 { 4121 struct vm_object *mapping = obj->base.vm_obj; 4122 drm_i915_private_t *dev_priv = dev->dev_private; 4123 int ret = 0; 4124 int page_count; 4125 int i; 4126 4127 if (id > I915_MAX_PHYS_OBJECT) 4128 return -EINVAL; 4129 4130 if (obj->phys_obj) { 4131 if (obj->phys_obj->id == id) 4132 return 0; 4133 i915_gem_detach_phys_object(dev, obj); 4134 } 4135 4136 /* create a new object */ 4137 if (!dev_priv->mm.phys_objs[id - 1]) { 4138 ret = i915_gem_init_phys_object(dev, id, 4139 obj->base.size, align); 4140 if (ret) { 4141 DRM_ERROR("failed to init phys object %d size: %zu\n", 4142 id, obj->base.size); 4143 return ret; 4144 } 4145 } 4146 4147 /* bind to the object */ 4148 obj->phys_obj = dev_priv->mm.phys_objs[id - 1]; 4149 obj->phys_obj->cur_obj = obj; 4150 4151 page_count = obj->base.size / PAGE_SIZE; 4152 4153 VM_OBJECT_LOCK(obj->base.vm_obj); 4154 for (i = 0; i < page_count; i++) { 4155 struct vm_page *page; 4156 char *dst, *src; 4157 4158 page = shmem_read_mapping_page(mapping, i); 4159 VM_OBJECT_UNLOCK(obj->base.vm_obj); 4160 if (IS_ERR(page)) 4161 return PTR_ERR(page); 4162 4163 src = kmap_atomic(page); 4164 dst = (char*)obj->phys_obj->handle->vaddr + (i * PAGE_SIZE); 4165 memcpy(dst, src, PAGE_SIZE); 4166 kunmap_atomic(src); 4167 4168 #if 0 4169 mark_page_accessed(page); 4170 page_cache_release(page); 4171 #endif 4172 VM_OBJECT_LOCK(obj->base.vm_obj); 4173 vm_page_reference(page); 4174 vm_page_busy_wait(page, FALSE, "i915gem"); 4175 vm_page_unwire(page, 0); 4176 vm_page_wakeup(page); 4177 } 4178 VM_OBJECT_UNLOCK(obj->base.vm_obj); 4179 4180 return 0; 4181 } 4182 4183 static int 4184 i915_gem_phys_pwrite(struct drm_device *dev, 4185 struct drm_i915_gem_object *obj, 4186 struct drm_i915_gem_pwrite *args, 4187 struct drm_file *file_priv) 4188 { 4189 void *vaddr = (char *)obj->phys_obj->handle->vaddr + args->offset; 4190 char __user *user_data = (char __user *) (uintptr_t) args->data_ptr; 4191 4192 if (copyin_nofault(user_data, vaddr, args->size) != 0) { 4193 unsigned long unwritten; 4194 4195 /* The physical object once assigned is fixed for the lifetime 4196 * of the obj, so we can safely drop the lock and continue 4197 * to access vaddr. 4198 */ 4199 mutex_unlock(&dev->struct_mutex); 4200 unwritten = copy_from_user(vaddr, user_data, args->size); 4201 mutex_lock(&dev->struct_mutex); 4202 if (unwritten) 4203 return -EFAULT; 4204 } 4205 4206 i915_gem_chipset_flush(dev); 4207 return 0; 4208 } 4209 4210 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 4211 { 4212 struct drm_i915_file_private *file_priv = file->driver_priv; 4213 4214 /* Clean up our request list when the client is going away, so that 4215 * later retire_requests won't dereference our soon-to-be-gone 4216 * file_priv. 4217 */ 4218 spin_lock(&file_priv->mm.lock); 4219 while (!list_empty(&file_priv->mm.request_list)) { 4220 struct drm_i915_gem_request *request; 4221 4222 request = list_first_entry(&file_priv->mm.request_list, 4223 struct drm_i915_gem_request, 4224 client_list); 4225 list_del(&request->client_list); 4226 request->file_priv = NULL; 4227 } 4228 spin_unlock(&file_priv->mm.lock); 4229 } 4230 4231 static int 4232 i915_gem_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, 4233 vm_ooffset_t foff, struct ucred *cred, u_short *color) 4234 { 4235 4236 *color = 0; /* XXXKIB */ 4237 return (0); 4238 } 4239 4240 int i915_intr_pf; 4241 4242 static int 4243 i915_gem_pager_fault(vm_object_t vm_obj, vm_ooffset_t offset, int prot, 4244 vm_page_t *mres) 4245 { 4246 struct drm_gem_object *gem_obj; 4247 struct drm_i915_gem_object *obj; 4248 struct drm_device *dev; 4249 drm_i915_private_t *dev_priv; 4250 vm_page_t m, oldm; 4251 int cause, ret; 4252 bool write; 4253 4254 gem_obj = vm_obj->handle; 4255 obj = to_intel_bo(gem_obj); 4256 dev = obj->base.dev; 4257 dev_priv = dev->dev_private; 4258 #if 0 4259 write = (prot & VM_PROT_WRITE) != 0; 4260 #else 4261 write = true; 4262 #endif 4263 vm_object_pip_add(vm_obj, 1); 4264 4265 /* 4266 * Remove the placeholder page inserted by vm_fault() from the 4267 * object before dropping the object lock. If 4268 * i915_gem_release_mmap() is active in parallel on this gem 4269 * object, then it owns the drm device sx and might find the 4270 * placeholder already. Then, since the page is busy, 4271 * i915_gem_release_mmap() sleeps waiting for the busy state 4272 * of the page cleared. We will be not able to acquire drm 4273 * device lock until i915_gem_release_mmap() is able to make a 4274 * progress. 4275 */ 4276 if (*mres != NULL) { 4277 oldm = *mres; 4278 vm_page_remove(oldm); 4279 *mres = NULL; 4280 } else 4281 oldm = NULL; 4282 retry: 4283 VM_OBJECT_UNLOCK(vm_obj); 4284 unlocked_vmobj: 4285 cause = ret = 0; 4286 m = NULL; 4287 4288 if (i915_intr_pf) { 4289 ret = i915_mutex_lock_interruptible(dev); 4290 if (ret != 0) { 4291 cause = 10; 4292 goto out; 4293 } 4294 } else 4295 mutex_lock(&dev->struct_mutex); 4296 4297 /* 4298 * Since the object lock was dropped, other thread might have 4299 * faulted on the same GTT address and instantiated the 4300 * mapping for the page. Recheck. 4301 */ 4302 VM_OBJECT_LOCK(vm_obj); 4303 m = vm_page_lookup(vm_obj, OFF_TO_IDX(offset)); 4304 if (m != NULL) { 4305 if ((m->flags & PG_BUSY) != 0) { 4306 mutex_unlock(&dev->struct_mutex); 4307 #if 0 /* XXX */ 4308 vm_page_sleep(m, "915pee"); 4309 #endif 4310 goto retry; 4311 } 4312 goto have_page; 4313 } else 4314 VM_OBJECT_UNLOCK(vm_obj); 4315 4316 /* Access to snoopable pages through the GTT is incoherent. */ 4317 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) { 4318 ret = -EINVAL; 4319 goto unlock; 4320 } 4321 4322 /* Now bind it into the GTT if needed */ 4323 if (!obj->map_and_fenceable) { 4324 ret = i915_gem_object_unbind(obj); 4325 if (ret != 0) { 4326 cause = 20; 4327 goto unlock; 4328 } 4329 } 4330 if (!obj->gtt_space) { 4331 ret = i915_gem_object_bind_to_gtt(obj, 0, true, false); 4332 if (ret != 0) { 4333 cause = 30; 4334 goto unlock; 4335 } 4336 4337 ret = i915_gem_object_set_to_gtt_domain(obj, write); 4338 if (ret != 0) { 4339 cause = 40; 4340 goto unlock; 4341 } 4342 } 4343 4344 if (obj->tiling_mode == I915_TILING_NONE) 4345 ret = i915_gem_object_put_fence(obj); 4346 else 4347 ret = i915_gem_object_get_fence(obj); 4348 if (ret != 0) { 4349 cause = 50; 4350 goto unlock; 4351 } 4352 4353 if (i915_gem_object_is_inactive(obj)) 4354 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list); 4355 4356 obj->fault_mappable = true; 4357 VM_OBJECT_LOCK(vm_obj); 4358 m = vm_phys_fictitious_to_vm_page(dev->agp->base + obj->gtt_offset + 4359 offset); 4360 if (m == NULL) { 4361 cause = 60; 4362 ret = -EFAULT; 4363 goto unlock; 4364 } 4365 KASSERT((m->flags & PG_FICTITIOUS) != 0, 4366 ("not fictitious %p", m)); 4367 KASSERT(m->wire_count == 1, ("wire_count not 1 %p", m)); 4368 4369 if ((m->flags & PG_BUSY) != 0) { 4370 mutex_unlock(&dev->struct_mutex); 4371 #if 0 /* XXX */ 4372 vm_page_sleep(m, "915pbs"); 4373 #endif 4374 goto retry; 4375 } 4376 m->valid = VM_PAGE_BITS_ALL; 4377 vm_page_insert(m, vm_obj, OFF_TO_IDX(offset)); 4378 have_page: 4379 *mres = m; 4380 vm_page_busy_try(m, false); 4381 4382 mutex_unlock(&dev->struct_mutex); 4383 if (oldm != NULL) { 4384 vm_page_free(oldm); 4385 } 4386 vm_object_pip_wakeup(vm_obj); 4387 return (VM_PAGER_OK); 4388 4389 unlock: 4390 mutex_unlock(&dev->struct_mutex); 4391 out: 4392 KASSERT(ret != 0, ("i915_gem_pager_fault: wrong return")); 4393 if (ret == -EAGAIN || ret == -EIO || ret == -EINTR) { 4394 goto unlocked_vmobj; 4395 } 4396 VM_OBJECT_LOCK(vm_obj); 4397 vm_object_pip_wakeup(vm_obj); 4398 return (VM_PAGER_ERROR); 4399 } 4400 4401 static void 4402 i915_gem_pager_dtor(void *handle) 4403 { 4404 struct drm_gem_object *obj; 4405 struct drm_device *dev; 4406 4407 obj = handle; 4408 dev = obj->dev; 4409 4410 mutex_lock(&dev->struct_mutex); 4411 drm_gem_free_mmap_offset(obj); 4412 i915_gem_release_mmap(to_intel_bo(obj)); 4413 drm_gem_object_unreference(obj); 4414 mutex_unlock(&dev->struct_mutex); 4415 } 4416 4417 struct cdev_pager_ops i915_gem_pager_ops = { 4418 .cdev_pg_fault = i915_gem_pager_fault, 4419 .cdev_pg_ctor = i915_gem_pager_ctor, 4420 .cdev_pg_dtor = i915_gem_pager_dtor 4421 }; 4422 4423 #define GEM_PARANOID_CHECK_GTT 0 4424 #if GEM_PARANOID_CHECK_GTT 4425 static void 4426 i915_gem_assert_pages_not_mapped(struct drm_device *dev, vm_page_t *ma, 4427 int page_count) 4428 { 4429 struct drm_i915_private *dev_priv; 4430 vm_paddr_t pa; 4431 unsigned long start, end; 4432 u_int i; 4433 int j; 4434 4435 dev_priv = dev->dev_private; 4436 start = OFF_TO_IDX(dev_priv->mm.gtt_start); 4437 end = OFF_TO_IDX(dev_priv->mm.gtt_end); 4438 for (i = start; i < end; i++) { 4439 pa = intel_gtt_read_pte_paddr(i); 4440 for (j = 0; j < page_count; j++) { 4441 if (pa == VM_PAGE_TO_PHYS(ma[j])) { 4442 panic("Page %p in GTT pte index %d pte %x", 4443 ma[i], i, intel_gtt_read_pte(i)); 4444 } 4445 } 4446 } 4447 obj->fence_dirty = false; 4448 } 4449 #endif 4450 4451 static int 4452 i915_gpu_is_active(struct drm_device *dev) 4453 { 4454 drm_i915_private_t *dev_priv = dev->dev_private; 4455 4456 return !list_empty(&dev_priv->mm.active_list); 4457 } 4458 4459 static void 4460 i915_gem_lowmem(void *arg) 4461 { 4462 struct drm_device *dev; 4463 struct drm_i915_private *dev_priv; 4464 struct drm_i915_gem_object *obj, *next; 4465 int cnt, cnt_fail, cnt_total; 4466 4467 dev = arg; 4468 dev_priv = dev->dev_private; 4469 4470 if (lockmgr(&dev->struct_mutex, LK_EXCLUSIVE|LK_NOWAIT)) 4471 return; 4472 4473 rescan: 4474 /* first scan for clean buffers */ 4475 i915_gem_retire_requests(dev); 4476 4477 cnt_total = cnt_fail = cnt = 0; 4478 4479 list_for_each_entry_safe(obj, next, &dev_priv->mm.inactive_list, 4480 mm_list) { 4481 if (i915_gem_object_is_purgeable(obj)) { 4482 if (i915_gem_object_unbind(obj) != 0) 4483 cnt_total++; 4484 } else 4485 cnt_total++; 4486 } 4487 4488 /* second pass, evict/count anything still on the inactive list */ 4489 list_for_each_entry_safe(obj, next, &dev_priv->mm.inactive_list, 4490 mm_list) { 4491 if (i915_gem_object_unbind(obj) == 0) 4492 cnt++; 4493 else 4494 cnt_fail++; 4495 } 4496 4497 if (cnt_fail > cnt_total / 100 && i915_gpu_is_active(dev)) { 4498 /* 4499 * We are desperate for pages, so as a last resort, wait 4500 * for the GPU to finish and discard whatever we can. 4501 * This has a dramatic impact to reduce the number of 4502 * OOM-killer events whilst running the GPU aggressively. 4503 */ 4504 if (i915_gpu_idle(dev) == 0) 4505 goto rescan; 4506 } 4507 mutex_unlock(&dev->struct_mutex); 4508 } 4509