1 /* 2 * Copyright © 2008 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 * Copyright (c) 2011 The FreeBSD Foundation 27 * All rights reserved. 28 * 29 * This software was developed by Konstantin Belousov under sponsorship from 30 * the FreeBSD Foundation. 31 * 32 * Redistribution and use in source and binary forms, with or without 33 * modification, are permitted provided that the following conditions 34 * are met: 35 * 1. Redistributions of source code must retain the above copyright 36 * notice, this list of conditions and the following disclaimer. 37 * 2. Redistributions in binary form must reproduce the above copyright 38 * notice, this list of conditions and the following disclaimer in the 39 * documentation and/or other materials provided with the distribution. 40 * 41 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 51 * SUCH DAMAGE. 52 * 53 * $FreeBSD: head/sys/dev/drm2/i915/i915_gem.c 253497 2013-07-20 13:52:40Z kib $ 54 */ 55 56 #include <sys/resourcevar.h> 57 #include <sys/sfbuf.h> 58 59 #include <drm/drmP.h> 60 #include <drm/i915_drm.h> 61 #include "i915_drv.h" 62 #include "intel_drv.h" 63 #include "intel_ringbuffer.h" 64 #include <linux/completion.h> 65 #include <linux/jiffies.h> 66 #include <linux/time.h> 67 68 static __must_check int i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj); 69 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); 70 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); 71 static int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, 72 unsigned alignment, bool map_and_fenceable); 73 static int i915_gem_phys_pwrite(struct drm_device *dev, 74 struct drm_i915_gem_object *obj, uint64_t data_ptr, uint64_t offset, 75 uint64_t size, struct drm_file *file_priv); 76 77 static void i915_gem_write_fence(struct drm_device *dev, int reg, 78 struct drm_i915_gem_object *obj); 79 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, 80 struct drm_i915_fence_reg *fence, 81 bool enable); 82 83 static uint32_t i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, 84 int tiling_mode); 85 static uint32_t i915_gem_get_gtt_alignment(struct drm_device *dev, 86 uint32_t size, int tiling_mode); 87 static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj, 88 int flags); 89 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj); 90 static void i915_gem_object_truncate(struct drm_i915_gem_object *obj); 91 92 static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) 93 { 94 if (obj->tiling_mode) 95 i915_gem_release_mmap(obj); 96 97 /* As we do not have an associated fence register, we will force 98 * a tiling change if we ever need to acquire one. 99 */ 100 obj->fence_dirty = false; 101 obj->fence_reg = I915_FENCE_REG_NONE; 102 } 103 104 static int i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj); 105 static bool i915_gem_object_is_inactive(struct drm_i915_gem_object *obj); 106 static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj); 107 static vm_page_t i915_gem_wire_page(vm_object_t object, vm_pindex_t pindex); 108 static void i915_gem_process_flushing_list(struct intel_ring_buffer *ring, 109 uint32_t flush_domains); 110 static void i915_gem_reset_fences(struct drm_device *dev); 111 static void i915_gem_lowmem(void *arg); 112 113 static int i915_gem_obj_io(struct drm_device *dev, uint32_t handle, uint64_t data_ptr, 114 uint64_t size, uint64_t offset, enum uio_rw rw, struct drm_file *file); 115 116 MALLOC_DEFINE(DRM_I915_GEM, "i915gem", "Allocations from i915 gem"); 117 long i915_gem_wired_pages_cnt; 118 119 /* some bookkeeping */ 120 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 121 size_t size) 122 { 123 124 dev_priv->mm.object_count++; 125 dev_priv->mm.object_memory += size; 126 } 127 128 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 129 size_t size) 130 { 131 132 dev_priv->mm.object_count--; 133 dev_priv->mm.object_memory -= size; 134 } 135 136 static int 137 i915_gem_wait_for_error(struct drm_device *dev) 138 { 139 struct drm_i915_private *dev_priv = dev->dev_private; 140 struct completion *x = &dev_priv->error_completion; 141 int ret; 142 143 if (!atomic_read(&dev_priv->mm.wedged)) 144 return 0; 145 146 /* 147 * Only wait 10 seconds for the gpu reset to complete to avoid hanging 148 * userspace. If it takes that long something really bad is going on and 149 * we should simply try to bail out and fail as gracefully as possible. 150 */ 151 ret = wait_for_completion_interruptible_timeout(x, 10*hz); 152 if (ret == 0) { 153 DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); 154 return -EIO; 155 } else if (ret < 0) { 156 return ret; 157 } 158 159 if (atomic_read(&dev_priv->mm.wedged)) { 160 /* GPU is hung, bump the completion count to account for 161 * the token we just consumed so that we never hit zero and 162 * end up waiting upon a subsequent completion event that 163 * will never happen. 164 */ 165 spin_lock(&x->wait.lock); 166 x->done++; 167 spin_unlock(&x->wait.lock); 168 } 169 return 0; 170 } 171 172 int i915_mutex_lock_interruptible(struct drm_device *dev) 173 { 174 int ret; 175 176 ret = i915_gem_wait_for_error(dev); 177 if (ret != 0) 178 return (ret); 179 180 ret = lockmgr(&dev->dev_struct_lock, LK_EXCLUSIVE|LK_SLEEPFAIL); 181 if (ret) 182 return -EINTR; 183 184 WARN_ON(i915_verify_lists(dev)); 185 return 0; 186 } 187 188 static inline bool 189 i915_gem_object_is_inactive(struct drm_i915_gem_object *obj) 190 { 191 return !obj->active; 192 } 193 194 int 195 i915_gem_init_ioctl(struct drm_device *dev, void *data, 196 struct drm_file *file) 197 { 198 struct drm_i915_gem_init *args = data; 199 200 if (drm_core_check_feature(dev, DRIVER_MODESET)) 201 return -ENODEV; 202 203 if (args->gtt_start >= args->gtt_end || 204 (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1)) 205 return -EINVAL; 206 207 /* GEM with user mode setting was never supported on ilk and later. */ 208 if (INTEL_INFO(dev)->gen >= 5) 209 return -ENODEV; 210 211 /* 212 * XXXKIB. The second-time initialization should be guarded 213 * against. 214 */ 215 lockmgr(&dev->dev_lock, LK_EXCLUSIVE|LK_RETRY|LK_CANRECURSE); 216 i915_gem_do_init(dev, args->gtt_start, args->gtt_end, args->gtt_end); 217 lockmgr(&dev->dev_lock, LK_RELEASE); 218 219 return 0; 220 } 221 222 int 223 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 224 struct drm_file *file) 225 { 226 struct drm_i915_private *dev_priv = dev->dev_private; 227 struct drm_i915_gem_get_aperture *args = data; 228 struct drm_i915_gem_object *obj; 229 size_t pinned; 230 231 pinned = 0; 232 DRM_LOCK(dev); 233 list_for_each_entry(obj, &dev_priv->mm.gtt_list, gtt_list) 234 if (obj->pin_count) 235 pinned += obj->gtt_space->size; 236 DRM_UNLOCK(dev); 237 238 args->aper_size = dev_priv->mm.gtt_total; 239 args->aper_available_size = args->aper_size - pinned; 240 241 return 0; 242 } 243 244 static int 245 i915_gem_create(struct drm_file *file, struct drm_device *dev, uint64_t size, 246 uint32_t *handle_p) 247 { 248 struct drm_i915_gem_object *obj; 249 uint32_t handle; 250 int ret; 251 252 size = roundup(size, PAGE_SIZE); 253 if (size == 0) 254 return (-EINVAL); 255 256 obj = i915_gem_alloc_object(dev, size); 257 if (obj == NULL) 258 return (-ENOMEM); 259 260 handle = 0; 261 ret = drm_gem_handle_create(file, &obj->base, &handle); 262 if (ret != 0) { 263 drm_gem_object_release(&obj->base); 264 i915_gem_info_remove_obj(dev->dev_private, obj->base.size); 265 drm_free(obj, DRM_I915_GEM); 266 return (-ret); 267 } 268 269 /* drop reference from allocate - handle holds it now */ 270 drm_gem_object_unreference(&obj->base); 271 *handle_p = handle; 272 return (0); 273 } 274 275 int 276 i915_gem_dumb_create(struct drm_file *file, 277 struct drm_device *dev, 278 struct drm_mode_create_dumb *args) 279 { 280 281 /* have to work out size/pitch and return them */ 282 args->pitch = roundup2(args->width * ((args->bpp + 7) / 8), 64); 283 args->size = args->pitch * args->height; 284 return (i915_gem_create(file, dev, args->size, &args->handle)); 285 } 286 287 int i915_gem_dumb_destroy(struct drm_file *file, 288 struct drm_device *dev, 289 uint32_t handle) 290 { 291 292 return (drm_gem_handle_delete(file, handle)); 293 } 294 295 /** 296 * Creates a new mm object and returns a handle to it. 297 */ 298 int 299 i915_gem_create_ioctl(struct drm_device *dev, void *data, 300 struct drm_file *file) 301 { 302 struct drm_i915_gem_create *args = data; 303 304 return (i915_gem_create(file, dev, args->size, &args->handle)); 305 } 306 307 static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj) 308 { 309 drm_i915_private_t *dev_priv; 310 311 dev_priv = obj->base.dev->dev_private; 312 return (dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 && 313 obj->tiling_mode != I915_TILING_NONE); 314 } 315 316 /** 317 * Reads data from the object referenced by handle. 318 * 319 * On error, the contents of *data are undefined. 320 */ 321 int 322 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 323 struct drm_file *file) 324 { 325 struct drm_i915_gem_pread *args; 326 327 args = data; 328 return (i915_gem_obj_io(dev, args->handle, args->data_ptr, args->size, 329 args->offset, UIO_READ, file)); 330 } 331 332 /** 333 * Writes data to the object referenced by handle. 334 * 335 * On error, the contents of the buffer that were to be modified are undefined. 336 */ 337 int 338 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 339 struct drm_file *file) 340 { 341 struct drm_i915_gem_pwrite *args; 342 343 args = data; 344 return (i915_gem_obj_io(dev, args->handle, args->data_ptr, args->size, 345 args->offset, UIO_WRITE, file)); 346 } 347 348 int 349 i915_gem_check_wedge(struct drm_i915_private *dev_priv, 350 bool interruptible) 351 { 352 if (atomic_read(&dev_priv->mm.wedged)) { 353 struct completion *x = &dev_priv->error_completion; 354 bool recovery_complete; 355 356 /* Give the error handler a chance to run. */ 357 spin_lock(&x->wait.lock); 358 recovery_complete = x->done > 0; 359 spin_unlock(&x->wait.lock); 360 361 /* Non-interruptible callers can't handle -EAGAIN, hence return 362 * -EIO unconditionally for these. */ 363 if (!interruptible) 364 return -EIO; 365 366 /* Recovery complete, but still wedged means reset failure. */ 367 if (recovery_complete) 368 return -EIO; 369 370 return -EAGAIN; 371 } 372 373 return 0; 374 } 375 376 /* 377 * Compare seqno against outstanding lazy request. Emit a request if they are 378 * equal. 379 */ 380 static int 381 i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno) 382 { 383 int ret; 384 385 DRM_LOCK_ASSERT(ring->dev); 386 387 ret = 0; 388 if (seqno == ring->outstanding_lazy_request) 389 ret = i915_add_request(ring, NULL, NULL); 390 391 return ret; 392 } 393 394 /** 395 * __wait_seqno - wait until execution of seqno has finished 396 * @ring: the ring expected to report seqno 397 * @seqno: duh! 398 * @interruptible: do an interruptible wait (normally yes) 399 * @timeout: in - how long to wait (NULL forever); out - how much time remaining 400 * 401 * Returns 0 if the seqno was found within the alloted time. Else returns the 402 * errno with remaining time filled in timeout argument. 403 */ 404 static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, 405 bool interruptible, struct timespec *timeout) 406 { 407 drm_i915_private_t *dev_priv = ring->dev->dev_private; 408 struct timespec before, now, wait_time={1,0}; 409 unsigned long timeout_jiffies; 410 long end; 411 bool wait_forever = true; 412 int ret; 413 414 if (i915_seqno_passed(ring->get_seqno(ring, true), seqno)) 415 return 0; 416 417 if (timeout != NULL) { 418 wait_time = *timeout; 419 wait_forever = false; 420 } 421 422 timeout_jiffies = timespec_to_jiffies(&wait_time); 423 424 if (WARN_ON(!ring->irq_get(ring))) 425 return -ENODEV; 426 427 /* Record current time in case interrupted by signal, or wedged * */ 428 getrawmonotonic(&before); 429 430 #define EXIT_COND \ 431 (i915_seqno_passed(ring->get_seqno(ring, false), seqno) || \ 432 atomic_read(&dev_priv->mm.wedged)) 433 do { 434 if (interruptible) 435 end = wait_event_interruptible_timeout(ring->irq_queue, 436 EXIT_COND, 437 timeout_jiffies); 438 else 439 end = wait_event_timeout(ring->irq_queue, EXIT_COND, 440 timeout_jiffies); 441 442 ret = i915_gem_check_wedge(dev_priv, interruptible); 443 if (ret) 444 end = ret; 445 } while (end == 0 && wait_forever); 446 447 getrawmonotonic(&now); 448 449 ring->irq_put(ring); 450 #undef EXIT_COND 451 452 if (timeout) { 453 struct timespec sleep_time = timespec_sub(now, before); 454 *timeout = timespec_sub(*timeout, sleep_time); 455 } 456 457 switch (end) { 458 case -EIO: 459 case -EAGAIN: /* Wedged */ 460 case -ERESTARTSYS: /* Signal */ 461 return (int)end; 462 case 0: /* Timeout */ 463 if (timeout) 464 set_normalized_timespec(timeout, 0, 0); 465 return -ETIMEDOUT; /* -ETIME on Linux */ 466 default: /* Completed */ 467 WARN_ON(end < 0); /* We're not aware of other errors */ 468 return 0; 469 } 470 } 471 472 /** 473 * Waits for a sequence number to be signaled, and cleans up the 474 * request and object lists appropriately for that event. 475 */ 476 int 477 i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno) 478 { 479 drm_i915_private_t *dev_priv = ring->dev->dev_private; 480 int ret = 0; 481 482 BUG_ON(seqno == 0); 483 484 ret = i915_gem_check_wedge(dev_priv, dev_priv->mm.interruptible); 485 if (ret) 486 return ret; 487 488 ret = i915_gem_check_olr(ring, seqno); 489 if (ret) 490 return ret; 491 492 ret = __wait_seqno(ring, seqno, dev_priv->mm.interruptible, NULL); 493 494 return ret; 495 } 496 497 /** 498 * Ensures that all rendering to the object has completed and the object is 499 * safe to unbind from the GTT or access from the CPU. 500 */ 501 static __must_check int 502 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, 503 bool readonly) 504 { 505 u32 seqno; 506 int ret; 507 508 /* This function only exists to support waiting for existing rendering, 509 * not for emitting required flushes. 510 */ 511 BUG_ON((obj->base.write_domain & I915_GEM_GPU_DOMAINS) != 0); 512 513 /* If there is rendering queued on the buffer being evicted, wait for 514 * it. 515 */ 516 if (readonly) 517 seqno = obj->last_write_seqno; 518 else 519 seqno = obj->last_read_seqno; 520 if (seqno == 0) 521 return 0; 522 523 ret = i915_wait_seqno(obj->ring, seqno); 524 if (ret) 525 return ret; 526 527 /* Manually manage the write flush as we may have not yet retired 528 * the buffer. 529 */ 530 if (obj->last_write_seqno && 531 i915_seqno_passed(seqno, obj->last_write_seqno)) { 532 obj->last_write_seqno = 0; 533 obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; 534 } 535 536 i915_gem_retire_requests_ring(obj->ring); 537 return 0; 538 } 539 540 /** 541 * Ensures that an object will eventually get non-busy by flushing any required 542 * write domains, emitting any outstanding lazy request and retiring and 543 * completed requests. 544 */ 545 static int 546 i915_gem_object_flush_active(struct drm_i915_gem_object *obj) 547 { 548 int ret; 549 550 if (obj->active) { 551 ret = i915_gem_object_flush_gpu_write_domain(obj); 552 if (ret) 553 return ret; 554 555 ret = i915_gem_check_olr(obj->ring, obj->last_read_seqno); 556 if (ret) 557 return ret; 558 559 i915_gem_retire_requests_ring(obj->ring); 560 } 561 562 return 0; 563 } 564 565 /** 566 * Called when user space prepares to use an object with the CPU, either 567 * through the mmap ioctl's mapping or a GTT mapping. 568 */ 569 int 570 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 571 struct drm_file *file) 572 { 573 struct drm_i915_gem_set_domain *args = data; 574 struct drm_i915_gem_object *obj; 575 uint32_t read_domains = args->read_domains; 576 uint32_t write_domain = args->write_domain; 577 int ret; 578 579 /* Only handle setting domains to types used by the CPU. */ 580 if (write_domain & I915_GEM_GPU_DOMAINS) 581 return -EINVAL; 582 583 if (read_domains & I915_GEM_GPU_DOMAINS) 584 return -EINVAL; 585 586 /* Having something in the write domain implies it's in the read 587 * domain, and only that read domain. Enforce that in the request. 588 */ 589 if (write_domain != 0 && read_domains != write_domain) 590 return -EINVAL; 591 592 ret = i915_mutex_lock_interruptible(dev); 593 if (ret) 594 return ret; 595 596 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 597 if (&obj->base == NULL) { 598 ret = -ENOENT; 599 goto unlock; 600 } 601 602 if (read_domains & I915_GEM_DOMAIN_GTT) { 603 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 604 605 /* Silently promote "you're not bound, there was nothing to do" 606 * to success, since the client was just asking us to 607 * make sure everything was done. 608 */ 609 if (ret == -EINVAL) 610 ret = 0; 611 } else { 612 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 613 } 614 615 drm_gem_object_unreference(&obj->base); 616 unlock: 617 DRM_UNLOCK(dev); 618 return ret; 619 } 620 621 /** 622 * Called when user space has done writes to this buffer 623 */ 624 int 625 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 626 struct drm_file *file) 627 { 628 struct drm_i915_gem_sw_finish *args = data; 629 struct drm_i915_gem_object *obj; 630 int ret = 0; 631 632 ret = i915_mutex_lock_interruptible(dev); 633 if (ret != 0) 634 return (ret); 635 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 636 if (&obj->base == NULL) { 637 ret = -ENOENT; 638 goto unlock; 639 } 640 641 /* Pinned buffers may be scanout, so flush the cache */ 642 if (obj->pin_count != 0) 643 i915_gem_object_flush_cpu_write_domain(obj); 644 645 drm_gem_object_unreference(&obj->base); 646 unlock: 647 DRM_UNLOCK(dev); 648 return (ret); 649 } 650 651 /** 652 * Maps the contents of an object, returning the address it is mapped 653 * into. 654 * 655 * While the mapping holds a reference on the contents of the object, it doesn't 656 * imply a ref on the object itself. 657 */ 658 int 659 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 660 struct drm_file *file) 661 { 662 struct drm_i915_gem_mmap *args; 663 struct drm_gem_object *obj; 664 struct proc *p; 665 vm_map_t map; 666 vm_offset_t addr; 667 vm_size_t size; 668 int error, rv; 669 670 args = data; 671 672 obj = drm_gem_object_lookup(dev, file, args->handle); 673 if (obj == NULL) 674 return (-ENOENT); 675 error = 0; 676 if (args->size == 0) 677 goto out; 678 p = curproc; 679 map = &p->p_vmspace->vm_map; 680 size = round_page(args->size); 681 PROC_LOCK(p); 682 if (map->size + size > p->p_rlimit[RLIMIT_VMEM].rlim_cur) { 683 PROC_UNLOCK(p); 684 error = ENOMEM; 685 goto out; 686 } 687 PROC_UNLOCK(p); 688 689 addr = 0; 690 vm_object_hold(obj->vm_obj); 691 vm_object_reference_locked(obj->vm_obj); 692 vm_object_drop(obj->vm_obj); 693 DRM_UNLOCK(dev); 694 rv = vm_map_find(map, obj->vm_obj, args->offset, &addr, args->size, 695 PAGE_SIZE, /* align */ 696 TRUE, /* fitit */ 697 VM_MAPTYPE_NORMAL, /* maptype */ 698 VM_PROT_READ | VM_PROT_WRITE, /* prot */ 699 VM_PROT_READ | VM_PROT_WRITE, /* max */ 700 MAP_SHARED /* cow */); 701 if (rv != KERN_SUCCESS) { 702 vm_object_deallocate(obj->vm_obj); 703 error = -vm_mmap_to_errno(rv); 704 } else { 705 args->addr_ptr = (uint64_t)addr; 706 } 707 DRM_LOCK(dev); 708 out: 709 drm_gem_object_unreference(obj); 710 return (error); 711 } 712 713 /** 714 * i915_gem_release_mmap - remove physical page mappings 715 * @obj: obj in question 716 * 717 * Preserve the reservation of the mmapping with the DRM core code, but 718 * relinquish ownership of the pages back to the system. 719 * 720 * It is vital that we remove the page mapping if we have mapped a tiled 721 * object through the GTT and then lose the fence register due to 722 * resource pressure. Similarly if the object has been moved out of the 723 * aperture, than pages mapped into userspace must be revoked. Removing the 724 * mapping will then trigger a page fault on the next user access, allowing 725 * fixup by i915_gem_fault(). 726 */ 727 void 728 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 729 { 730 vm_object_t devobj; 731 vm_page_t m; 732 int i, page_count; 733 734 if (!obj->fault_mappable) 735 return; 736 737 devobj = cdev_pager_lookup(obj); 738 if (devobj != NULL) { 739 page_count = OFF_TO_IDX(obj->base.size); 740 741 VM_OBJECT_LOCK(devobj); 742 for (i = 0; i < page_count; i++) { 743 m = vm_page_lookup_busy_wait(devobj, i, TRUE, "915unm"); 744 if (m == NULL) 745 continue; 746 cdev_pager_free_page(devobj, m); 747 } 748 VM_OBJECT_UNLOCK(devobj); 749 vm_object_deallocate(devobj); 750 } 751 752 obj->fault_mappable = false; 753 } 754 755 static uint32_t 756 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) 757 { 758 uint32_t gtt_size; 759 760 if (INTEL_INFO(dev)->gen >= 4 || 761 tiling_mode == I915_TILING_NONE) 762 return (size); 763 764 /* Previous chips need a power-of-two fence region when tiling */ 765 if (INTEL_INFO(dev)->gen == 3) 766 gtt_size = 1024*1024; 767 else 768 gtt_size = 512*1024; 769 770 while (gtt_size < size) 771 gtt_size <<= 1; 772 773 return (gtt_size); 774 } 775 776 /** 777 * i915_gem_get_gtt_alignment - return required GTT alignment for an object 778 * @obj: object to check 779 * 780 * Return the required GTT alignment for an object, taking into account 781 * potential fence register mapping. 782 */ 783 static uint32_t 784 i915_gem_get_gtt_alignment(struct drm_device *dev, 785 uint32_t size, 786 int tiling_mode) 787 { 788 789 /* 790 * Minimum alignment is 4k (GTT page size), but might be greater 791 * if a fence register is needed for the object. 792 */ 793 if (INTEL_INFO(dev)->gen >= 4 || 794 tiling_mode == I915_TILING_NONE) 795 return (4096); 796 797 /* 798 * Previous chips need to be aligned to the size of the smallest 799 * fence register that can contain the object. 800 */ 801 return (i915_gem_get_gtt_size(dev, size, tiling_mode)); 802 } 803 804 /** 805 * i915_gem_get_unfenced_gtt_alignment - return required GTT alignment for an 806 * unfenced object 807 * @dev: the device 808 * @size: size of the object 809 * @tiling_mode: tiling mode of the object 810 * 811 * Return the required GTT alignment for an object, only taking into account 812 * unfenced tiled surface requirements. 813 */ 814 uint32_t 815 i915_gem_get_unfenced_gtt_alignment(struct drm_device *dev, 816 uint32_t size, 817 int tiling_mode) 818 { 819 820 if (tiling_mode == I915_TILING_NONE) 821 return (4096); 822 823 /* 824 * Minimum alignment is 4k (GTT page size) for sane hw. 825 */ 826 if (INTEL_INFO(dev)->gen >= 4 || IS_G33(dev)) 827 return (4096); 828 829 /* 830 * Previous hardware however needs to be aligned to a power-of-two 831 * tile height. The simplest method for determining this is to reuse 832 * the power-of-tile object size. 833 */ 834 return (i915_gem_get_gtt_size(dev, size, tiling_mode)); 835 } 836 837 int 838 i915_gem_mmap_gtt(struct drm_file *file, 839 struct drm_device *dev, 840 uint32_t handle, 841 uint64_t *offset) 842 { 843 struct drm_i915_private *dev_priv; 844 struct drm_i915_gem_object *obj; 845 int ret; 846 847 dev_priv = dev->dev_private; 848 849 ret = i915_mutex_lock_interruptible(dev); 850 if (ret != 0) 851 return (ret); 852 853 obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle)); 854 if (&obj->base == NULL) { 855 ret = -ENOENT; 856 goto unlock; 857 } 858 859 if (obj->base.size > dev_priv->mm.gtt_mappable_end) { 860 ret = -E2BIG; 861 goto out; 862 } 863 864 if (obj->madv != I915_MADV_WILLNEED) { 865 DRM_ERROR("Attempting to mmap a purgeable buffer\n"); 866 ret = -EINVAL; 867 goto out; 868 } 869 870 ret = drm_gem_create_mmap_offset(&obj->base); 871 if (ret != 0) 872 goto out; 873 874 *offset = DRM_GEM_MAPPING_OFF(obj->base.map_list.key) | 875 DRM_GEM_MAPPING_KEY; 876 out: 877 drm_gem_object_unreference(&obj->base); 878 unlock: 879 DRM_UNLOCK(dev); 880 return (ret); 881 } 882 883 /** 884 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 885 * @dev: DRM device 886 * @data: GTT mapping ioctl data 887 * @file: GEM object info 888 * 889 * Simply returns the fake offset to userspace so it can mmap it. 890 * The mmap call will end up in drm_gem_mmap(), which will set things 891 * up so we can get faults in the handler above. 892 * 893 * The fault handler will take care of binding the object into the GTT 894 * (since it may have been evicted to make room for something), allocating 895 * a fence register, and mapping the appropriate aperture address into 896 * userspace. 897 */ 898 int 899 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 900 struct drm_file *file) 901 { 902 struct drm_i915_private *dev_priv; 903 struct drm_i915_gem_mmap_gtt *args = data; 904 905 dev_priv = dev->dev_private; 906 907 return (i915_gem_mmap_gtt(file, dev, args->handle, &args->offset)); 908 } 909 910 /* Immediately discard the backing storage */ 911 static void 912 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 913 { 914 vm_object_t vm_obj; 915 916 vm_obj = obj->base.vm_obj; 917 VM_OBJECT_LOCK(vm_obj); 918 vm_object_page_remove(vm_obj, 0, 0, false); 919 VM_OBJECT_UNLOCK(vm_obj); 920 obj->madv = __I915_MADV_PURGED; 921 } 922 923 static inline int 924 i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj) 925 { 926 return obj->madv == I915_MADV_DONTNEED; 927 } 928 929 static inline void vm_page_reference(vm_page_t m) 930 { 931 vm_page_flag_set(m, PG_REFERENCED); 932 } 933 934 static void 935 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) 936 { 937 vm_page_t m; 938 int page_count, i; 939 940 BUG_ON(obj->madv == __I915_MADV_PURGED); 941 942 if (obj->tiling_mode != I915_TILING_NONE) 943 i915_gem_object_save_bit_17_swizzle(obj); 944 if (obj->madv == I915_MADV_DONTNEED) 945 obj->dirty = 0; 946 page_count = obj->base.size / PAGE_SIZE; 947 VM_OBJECT_LOCK(obj->base.vm_obj); 948 #if GEM_PARANOID_CHECK_GTT 949 i915_gem_assert_pages_not_mapped(obj->base.dev, obj->pages, page_count); 950 #endif 951 for (i = 0; i < page_count; i++) { 952 m = obj->pages[i]; 953 if (obj->dirty) 954 vm_page_dirty(m); 955 if (obj->madv == I915_MADV_WILLNEED) 956 vm_page_reference(m); 957 vm_page_busy_wait(obj->pages[i], FALSE, "i915gem"); 958 vm_page_unwire(obj->pages[i], 1); 959 vm_page_wakeup(obj->pages[i]); 960 atomic_add_long(&i915_gem_wired_pages_cnt, -1); 961 } 962 VM_OBJECT_UNLOCK(obj->base.vm_obj); 963 obj->dirty = 0; 964 drm_free(obj->pages, DRM_I915_GEM); 965 obj->pages = NULL; 966 } 967 968 static int 969 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj, 970 int flags) 971 { 972 struct drm_device *dev; 973 vm_object_t vm_obj; 974 vm_page_t m; 975 int page_count, i, j; 976 977 dev = obj->base.dev; 978 KASSERT(obj->pages == NULL, ("Obj already has pages")); 979 page_count = obj->base.size / PAGE_SIZE; 980 obj->pages = kmalloc(page_count * sizeof(vm_page_t), DRM_I915_GEM, 981 M_WAITOK); 982 vm_obj = obj->base.vm_obj; 983 VM_OBJECT_LOCK(vm_obj); 984 for (i = 0; i < page_count; i++) { 985 if ((obj->pages[i] = i915_gem_wire_page(vm_obj, i)) == NULL) 986 goto failed; 987 } 988 VM_OBJECT_UNLOCK(vm_obj); 989 if (i915_gem_object_needs_bit17_swizzle(obj)) 990 i915_gem_object_do_bit_17_swizzle(obj); 991 return (0); 992 993 failed: 994 for (j = 0; j < i; j++) { 995 m = obj->pages[j]; 996 vm_page_busy_wait(m, FALSE, "i915gem"); 997 vm_page_unwire(m, 0); 998 vm_page_wakeup(m); 999 atomic_add_long(&i915_gem_wired_pages_cnt, -1); 1000 } 1001 VM_OBJECT_UNLOCK(vm_obj); 1002 drm_free(obj->pages, DRM_I915_GEM); 1003 obj->pages = NULL; 1004 return (-EIO); 1005 } 1006 1007 void 1008 i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, 1009 struct intel_ring_buffer *ring) 1010 { 1011 struct drm_device *dev = obj->base.dev; 1012 struct drm_i915_private *dev_priv = dev->dev_private; 1013 u32 seqno = intel_ring_get_seqno(ring); 1014 1015 BUG_ON(ring == NULL); 1016 obj->ring = ring; 1017 1018 /* Add a reference if we're newly entering the active list. */ 1019 if (!obj->active) { 1020 drm_gem_object_reference(&obj->base); 1021 obj->active = 1; 1022 } 1023 1024 /* Move from whatever list we were on to the tail of execution. */ 1025 list_move_tail(&obj->mm_list, &dev_priv->mm.active_list); 1026 list_move_tail(&obj->ring_list, &ring->active_list); 1027 1028 obj->last_read_seqno = seqno; 1029 1030 if (obj->fenced_gpu_access) { 1031 obj->last_fenced_seqno = seqno; 1032 1033 /* Bump MRU to take account of the delayed flush */ 1034 if (obj->fence_reg != I915_FENCE_REG_NONE) { 1035 struct drm_i915_fence_reg *reg; 1036 1037 reg = &dev_priv->fence_regs[obj->fence_reg]; 1038 list_move_tail(®->lru_list, 1039 &dev_priv->mm.fence_list); 1040 } 1041 } 1042 } 1043 1044 static void 1045 i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj) 1046 { 1047 struct drm_device *dev = obj->base.dev; 1048 struct drm_i915_private *dev_priv = dev->dev_private; 1049 1050 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list); 1051 1052 BUG_ON(!list_empty(&obj->gpu_write_list)); 1053 BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS); 1054 BUG_ON(!obj->active); 1055 1056 list_del_init(&obj->ring_list); 1057 obj->ring = NULL; 1058 1059 obj->last_read_seqno = 0; 1060 obj->last_write_seqno = 0; 1061 obj->base.write_domain = 0; 1062 1063 obj->last_fenced_seqno = 0; 1064 obj->fenced_gpu_access = false; 1065 1066 obj->active = 0; 1067 drm_gem_object_unreference(&obj->base); 1068 1069 WARN_ON(i915_verify_lists(dev)); 1070 } 1071 1072 static int 1073 i915_gem_handle_seqno_wrap(struct drm_device *dev) 1074 { 1075 struct drm_i915_private *dev_priv = dev->dev_private; 1076 struct intel_ring_buffer *ring; 1077 int ret, i, j; 1078 1079 /* The hardware uses various monotonic 32-bit counters, if we 1080 * detect that they will wraparound we need to idle the GPU 1081 * and reset those counters. 1082 */ 1083 ret = 0; 1084 for_each_ring(ring, dev_priv, i) { 1085 for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++) 1086 ret |= ring->sync_seqno[j] != 0; 1087 } 1088 if (ret == 0) 1089 return ret; 1090 1091 ret = i915_gpu_idle(dev); 1092 if (ret) 1093 return ret; 1094 1095 i915_gem_retire_requests(dev); 1096 for_each_ring(ring, dev_priv, i) { 1097 for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++) 1098 ring->sync_seqno[j] = 0; 1099 } 1100 1101 return 0; 1102 } 1103 1104 int 1105 i915_gem_get_seqno(struct drm_device *dev, u32 *seqno) 1106 { 1107 struct drm_i915_private *dev_priv = dev->dev_private; 1108 1109 /* reserve 0 for non-seqno */ 1110 if (dev_priv->next_seqno == 0) { 1111 int ret = i915_gem_handle_seqno_wrap(dev); 1112 if (ret) 1113 return ret; 1114 1115 dev_priv->next_seqno = 1; 1116 } 1117 1118 *seqno = dev_priv->next_seqno++; 1119 return 0; 1120 } 1121 1122 int 1123 i915_add_request(struct intel_ring_buffer *ring, 1124 struct drm_file *file, 1125 u32 *out_seqno) 1126 { 1127 drm_i915_private_t *dev_priv = ring->dev->dev_private; 1128 struct drm_i915_gem_request *request; 1129 u32 request_ring_position; 1130 int was_empty; 1131 int ret; 1132 1133 /* 1134 * Emit any outstanding flushes - execbuf can fail to emit the flush 1135 * after having emitted the batchbuffer command. Hence we need to fix 1136 * things up similar to emitting the lazy request. The difference here 1137 * is that the flush _must_ happen before the next request, no matter 1138 * what. 1139 */ 1140 if (ring->gpu_caches_dirty) { 1141 ret = i915_gem_flush_ring(ring, 0, I915_GEM_GPU_DOMAINS); 1142 if (ret) 1143 return ret; 1144 1145 ring->gpu_caches_dirty = false; 1146 } 1147 1148 request = kmalloc(sizeof(*request), DRM_I915_GEM, M_WAITOK | M_ZERO); 1149 if (request == NULL) 1150 return -ENOMEM; 1151 1152 /* Record the position of the start of the request so that 1153 * should we detect the updated seqno part-way through the 1154 * GPU processing the request, we never over-estimate the 1155 * position of the head. 1156 */ 1157 request_ring_position = intel_ring_get_tail(ring); 1158 1159 ret = ring->add_request(ring); 1160 if (ret) { 1161 kfree(request, DRM_I915_GEM); 1162 return ret; 1163 } 1164 1165 request->seqno = intel_ring_get_seqno(ring); 1166 request->ring = ring; 1167 request->tail = request_ring_position; 1168 request->emitted_jiffies = jiffies; 1169 was_empty = list_empty(&ring->request_list); 1170 list_add_tail(&request->list, &ring->request_list); 1171 request->file_priv = NULL; 1172 1173 if (file) { 1174 struct drm_i915_file_private *file_priv = file->driver_priv; 1175 1176 spin_lock(&file_priv->mm.lock); 1177 request->file_priv = file_priv; 1178 list_add_tail(&request->client_list, 1179 &file_priv->mm.request_list); 1180 spin_unlock(&file_priv->mm.lock); 1181 } 1182 1183 ring->outstanding_lazy_request = 0; 1184 1185 if (!dev_priv->mm.suspended) { 1186 if (i915_enable_hangcheck) { 1187 mod_timer(&dev_priv->hangcheck_timer, 1188 round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES)); 1189 } 1190 if (was_empty) { 1191 queue_delayed_work(dev_priv->wq, 1192 &dev_priv->mm.retire_work, 1193 round_jiffies_up_relative(hz)); 1194 intel_mark_busy(dev_priv->dev); 1195 } 1196 } 1197 1198 if (out_seqno) 1199 *out_seqno = request->seqno; 1200 return 0; 1201 } 1202 1203 static inline void 1204 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) 1205 { 1206 struct drm_i915_file_private *file_priv = request->file_priv; 1207 1208 if (!file_priv) 1209 return; 1210 1211 DRM_LOCK_ASSERT(request->ring->dev); 1212 1213 spin_lock(&file_priv->mm.lock); 1214 if (request->file_priv != NULL) { 1215 list_del(&request->client_list); 1216 request->file_priv = NULL; 1217 } 1218 spin_unlock(&file_priv->mm.lock); 1219 } 1220 1221 static void 1222 i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv, 1223 struct intel_ring_buffer *ring) 1224 { 1225 1226 if (ring->dev != NULL) 1227 DRM_LOCK_ASSERT(ring->dev); 1228 1229 while (!list_empty(&ring->request_list)) { 1230 struct drm_i915_gem_request *request; 1231 1232 request = list_first_entry(&ring->request_list, 1233 struct drm_i915_gem_request, list); 1234 1235 list_del(&request->list); 1236 i915_gem_request_remove_from_client(request); 1237 drm_free(request, DRM_I915_GEM); 1238 } 1239 1240 while (!list_empty(&ring->active_list)) { 1241 struct drm_i915_gem_object *obj; 1242 1243 obj = list_first_entry(&ring->active_list, 1244 struct drm_i915_gem_object, ring_list); 1245 1246 list_del_init(&obj->gpu_write_list); 1247 i915_gem_object_move_to_inactive(obj); 1248 } 1249 } 1250 1251 static void i915_gem_reset_fences(struct drm_device *dev) 1252 { 1253 struct drm_i915_private *dev_priv = dev->dev_private; 1254 int i; 1255 1256 for (i = 0; i < dev_priv->num_fence_regs; i++) { 1257 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; 1258 1259 i915_gem_write_fence(dev, i, NULL); 1260 1261 if (reg->obj) 1262 i915_gem_object_fence_lost(reg->obj); 1263 1264 reg->pin_count = 0; 1265 reg->obj = NULL; 1266 INIT_LIST_HEAD(®->lru_list); 1267 } 1268 1269 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 1270 } 1271 1272 void i915_gem_reset(struct drm_device *dev) 1273 { 1274 struct drm_i915_private *dev_priv = dev->dev_private; 1275 struct drm_i915_gem_object *obj; 1276 struct intel_ring_buffer *ring; 1277 int i; 1278 1279 for_each_ring(ring, dev_priv, i) 1280 i915_gem_reset_ring_lists(dev_priv, ring); 1281 1282 /* Move everything out of the GPU domains to ensure we do any 1283 * necessary invalidation upon reuse. 1284 */ 1285 list_for_each_entry(obj, 1286 &dev_priv->mm.inactive_list, 1287 mm_list) 1288 { 1289 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS; 1290 } 1291 1292 /* The fence registers are invalidated so clear them out */ 1293 i915_gem_reset_fences(dev); 1294 } 1295 1296 /** 1297 * This function clears the request list as sequence numbers are passed. 1298 */ 1299 void 1300 i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) 1301 { 1302 uint32_t seqno; 1303 1304 if (list_empty(&ring->request_list)) 1305 return; 1306 1307 WARN_ON(i915_verify_lists(ring->dev)); 1308 1309 seqno = ring->get_seqno(ring, true); 1310 1311 while (!list_empty(&ring->request_list)) { 1312 struct drm_i915_gem_request *request; 1313 1314 request = list_first_entry(&ring->request_list, 1315 struct drm_i915_gem_request, 1316 list); 1317 1318 if (!i915_seqno_passed(seqno, request->seqno)) 1319 break; 1320 1321 /* We know the GPU must have read the request to have 1322 * sent us the seqno + interrupt, so use the position 1323 * of tail of the request to update the last known position 1324 * of the GPU head. 1325 */ 1326 ring->last_retired_head = request->tail; 1327 1328 list_del(&request->list); 1329 i915_gem_request_remove_from_client(request); 1330 kfree(request, DRM_I915_GEM); 1331 } 1332 1333 /* Move any buffers on the active list that are no longer referenced 1334 * by the ringbuffer to the flushing/inactive lists as appropriate. 1335 */ 1336 while (!list_empty(&ring->active_list)) { 1337 struct drm_i915_gem_object *obj; 1338 1339 obj = list_first_entry(&ring->active_list, 1340 struct drm_i915_gem_object, 1341 ring_list); 1342 1343 if (!i915_seqno_passed(seqno, obj->last_read_seqno)) 1344 break; 1345 1346 i915_gem_object_move_to_inactive(obj); 1347 } 1348 1349 if (unlikely(ring->trace_irq_seqno && 1350 i915_seqno_passed(seqno, ring->trace_irq_seqno))) { 1351 ring->irq_put(ring); 1352 ring->trace_irq_seqno = 0; 1353 } 1354 1355 } 1356 1357 void 1358 i915_gem_retire_requests(struct drm_device *dev) 1359 { 1360 drm_i915_private_t *dev_priv = dev->dev_private; 1361 struct intel_ring_buffer *ring; 1362 int i; 1363 1364 for_each_ring(ring, dev_priv, i) 1365 i915_gem_retire_requests_ring(ring); 1366 } 1367 1368 static void 1369 i915_gem_retire_work_handler(struct work_struct *work) 1370 { 1371 drm_i915_private_t *dev_priv; 1372 struct drm_device *dev; 1373 struct intel_ring_buffer *ring; 1374 bool idle; 1375 int i; 1376 1377 dev_priv = container_of(work, drm_i915_private_t, 1378 mm.retire_work.work); 1379 dev = dev_priv->dev; 1380 1381 /* Come back later if the device is busy... */ 1382 if (lockmgr(&dev->dev_struct_lock, LK_EXCLUSIVE|LK_NOWAIT)) { 1383 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 1384 round_jiffies_up_relative(hz)); 1385 return; 1386 } 1387 1388 i915_gem_retire_requests(dev); 1389 1390 /* Send a periodic flush down the ring so we don't hold onto GEM 1391 * objects indefinitely. 1392 */ 1393 idle = true; 1394 for_each_ring(ring, dev_priv, i) { 1395 if (ring->gpu_caches_dirty) 1396 i915_add_request(ring, NULL, NULL); 1397 1398 idle &= list_empty(&ring->request_list); 1399 } 1400 1401 if (!dev_priv->mm.suspended && !idle) 1402 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 1403 round_jiffies_up_relative(hz)); 1404 if (idle) 1405 intel_mark_idle(dev); 1406 1407 DRM_UNLOCK(dev); 1408 } 1409 1410 /** 1411 * i915_gem_object_sync - sync an object to a ring. 1412 * 1413 * @obj: object which may be in use on another ring. 1414 * @to: ring we wish to use the object on. May be NULL. 1415 * 1416 * This code is meant to abstract object synchronization with the GPU. 1417 * Calling with NULL implies synchronizing the object with the CPU 1418 * rather than a particular GPU ring. 1419 * 1420 * Returns 0 if successful, else propagates up the lower layer error. 1421 */ 1422 int 1423 i915_gem_object_sync(struct drm_i915_gem_object *obj, 1424 struct intel_ring_buffer *to) 1425 { 1426 struct intel_ring_buffer *from = obj->ring; 1427 u32 seqno; 1428 int ret, idx; 1429 1430 if (from == NULL || to == from) 1431 return 0; 1432 1433 if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev)) 1434 return i915_gem_object_wait_rendering(obj, false); 1435 1436 idx = intel_ring_sync_index(from, to); 1437 1438 seqno = obj->last_read_seqno; 1439 if (seqno <= from->sync_seqno[idx]) 1440 return 0; 1441 1442 ret = i915_gem_check_olr(obj->ring, seqno); 1443 if (ret) 1444 return ret; 1445 1446 ret = to->sync_to(to, from, seqno); 1447 if (!ret) 1448 from->sync_seqno[idx] = seqno; 1449 1450 return ret; 1451 } 1452 1453 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) 1454 { 1455 u32 old_write_domain, old_read_domains; 1456 1457 /* Act a barrier for all accesses through the GTT */ 1458 cpu_mfence(); 1459 1460 /* Force a pagefault for domain tracking on next user access */ 1461 i915_gem_release_mmap(obj); 1462 1463 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 1464 return; 1465 1466 old_read_domains = obj->base.read_domains; 1467 old_write_domain = obj->base.write_domain; 1468 1469 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT; 1470 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT; 1471 1472 } 1473 1474 /** 1475 * Unbinds an object from the GTT aperture. 1476 */ 1477 int 1478 i915_gem_object_unbind(struct drm_i915_gem_object *obj) 1479 { 1480 drm_i915_private_t *dev_priv = obj->base.dev->dev_private; 1481 int ret = 0; 1482 1483 if (obj->gtt_space == NULL) 1484 return 0; 1485 1486 if (obj->pin_count != 0) { 1487 DRM_ERROR("Attempting to unbind pinned buffer\n"); 1488 return -EINVAL; 1489 } 1490 1491 ret = i915_gem_object_finish_gpu(obj); 1492 if (ret) 1493 return ret; 1494 /* Continue on if we fail due to EIO, the GPU is hung so we 1495 * should be safe and we need to cleanup or else we might 1496 * cause memory corruption through use-after-free. 1497 */ 1498 1499 i915_gem_object_finish_gtt(obj); 1500 1501 /* Move the object to the CPU domain to ensure that 1502 * any possible CPU writes while it's not in the GTT 1503 * are flushed when we go to remap it. 1504 */ 1505 if (ret == 0) 1506 ret = i915_gem_object_set_to_cpu_domain(obj, 1); 1507 if (ret == -ERESTART || ret == -EINTR) 1508 return ret; 1509 if (ret) { 1510 /* In the event of a disaster, abandon all caches and 1511 * hope for the best. 1512 */ 1513 i915_gem_clflush_object(obj); 1514 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 1515 } 1516 1517 /* release the fence reg _after_ flushing */ 1518 ret = i915_gem_object_put_fence(obj); 1519 if (ret) 1520 return ret; 1521 1522 if (obj->has_global_gtt_mapping) 1523 i915_gem_gtt_unbind_object(obj); 1524 if (obj->has_aliasing_ppgtt_mapping) { 1525 i915_ppgtt_unbind_object(dev_priv->mm.aliasing_ppgtt, obj); 1526 obj->has_aliasing_ppgtt_mapping = 0; 1527 } 1528 i915_gem_gtt_finish_object(obj); 1529 1530 i915_gem_object_put_pages_gtt(obj); 1531 1532 list_del_init(&obj->gtt_list); 1533 list_del_init(&obj->mm_list); 1534 /* Avoid an unnecessary call to unbind on rebind. */ 1535 obj->map_and_fenceable = true; 1536 1537 drm_mm_put_block(obj->gtt_space); 1538 obj->gtt_space = NULL; 1539 obj->gtt_offset = 0; 1540 1541 if (i915_gem_object_is_purgeable(obj)) 1542 i915_gem_object_truncate(obj); 1543 1544 return ret; 1545 } 1546 1547 int i915_gpu_idle(struct drm_device *dev) 1548 { 1549 drm_i915_private_t *dev_priv = dev->dev_private; 1550 struct intel_ring_buffer *ring; 1551 int ret, i; 1552 1553 /* Flush everything onto the inactive list. */ 1554 for_each_ring(ring, dev_priv, i) { 1555 ret = intel_ring_idle(ring); 1556 if (ret) 1557 return ret; 1558 } 1559 1560 return 0; 1561 } 1562 1563 static void sandybridge_write_fence_reg(struct drm_device *dev, int reg, 1564 struct drm_i915_gem_object *obj) 1565 { 1566 drm_i915_private_t *dev_priv = dev->dev_private; 1567 uint64_t val; 1568 1569 if (obj) { 1570 u32 size = obj->gtt_space->size; 1571 1572 val = (uint64_t)((obj->gtt_offset + size - 4096) & 1573 0xfffff000) << 32; 1574 val |= obj->gtt_offset & 0xfffff000; 1575 val |= (uint64_t)((obj->stride / 128) - 1) << 1576 SANDYBRIDGE_FENCE_PITCH_SHIFT; 1577 1578 if (obj->tiling_mode == I915_TILING_Y) 1579 val |= 1 << I965_FENCE_TILING_Y_SHIFT; 1580 val |= I965_FENCE_REG_VALID; 1581 } else 1582 val = 0; 1583 1584 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + reg * 8, val); 1585 POSTING_READ(FENCE_REG_SANDYBRIDGE_0 + reg * 8); 1586 } 1587 1588 static void i965_write_fence_reg(struct drm_device *dev, int reg, 1589 struct drm_i915_gem_object *obj) 1590 { 1591 drm_i915_private_t *dev_priv = dev->dev_private; 1592 uint64_t val; 1593 1594 if (obj) { 1595 u32 size = obj->gtt_space->size; 1596 1597 val = (uint64_t)((obj->gtt_offset + size - 4096) & 1598 0xfffff000) << 32; 1599 val |= obj->gtt_offset & 0xfffff000; 1600 val |= ((obj->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT; 1601 if (obj->tiling_mode == I915_TILING_Y) 1602 val |= 1 << I965_FENCE_TILING_Y_SHIFT; 1603 val |= I965_FENCE_REG_VALID; 1604 } else 1605 val = 0; 1606 1607 I915_WRITE64(FENCE_REG_965_0 + reg * 8, val); 1608 POSTING_READ(FENCE_REG_965_0 + reg * 8); 1609 } 1610 1611 static void i915_write_fence_reg(struct drm_device *dev, int reg, 1612 struct drm_i915_gem_object *obj) 1613 { 1614 drm_i915_private_t *dev_priv = dev->dev_private; 1615 u32 val; 1616 1617 if (obj) { 1618 u32 size = obj->gtt_space->size; 1619 int pitch_val; 1620 int tile_width; 1621 1622 WARN((obj->gtt_offset & ~I915_FENCE_START_MASK) || 1623 (size & -size) != size || 1624 (obj->gtt_offset & (size - 1)), 1625 "object 0x%08x [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n", 1626 obj->gtt_offset, obj->map_and_fenceable, size); 1627 1628 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)) 1629 tile_width = 128; 1630 else 1631 tile_width = 512; 1632 1633 /* Note: pitch better be a power of two tile widths */ 1634 pitch_val = obj->stride / tile_width; 1635 pitch_val = ffs(pitch_val) - 1; 1636 1637 val = obj->gtt_offset; 1638 if (obj->tiling_mode == I915_TILING_Y) 1639 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 1640 val |= I915_FENCE_SIZE_BITS(size); 1641 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 1642 val |= I830_FENCE_REG_VALID; 1643 } else 1644 val = 0; 1645 1646 if (reg < 8) 1647 reg = FENCE_REG_830_0 + reg * 4; 1648 else 1649 reg = FENCE_REG_945_8 + (reg - 8) * 4; 1650 1651 I915_WRITE(reg, val); 1652 POSTING_READ(reg); 1653 } 1654 1655 static void i830_write_fence_reg(struct drm_device *dev, int reg, 1656 struct drm_i915_gem_object *obj) 1657 { 1658 drm_i915_private_t *dev_priv = dev->dev_private; 1659 uint32_t val; 1660 1661 if (obj) { 1662 u32 size = obj->gtt_space->size; 1663 uint32_t pitch_val; 1664 1665 WARN((obj->gtt_offset & ~I830_FENCE_START_MASK) || 1666 (size & -size) != size || 1667 (obj->gtt_offset & (size - 1)), 1668 "object 0x%08x not 512K or pot-size 0x%08x aligned\n", 1669 obj->gtt_offset, size); 1670 1671 pitch_val = obj->stride / 128; 1672 pitch_val = ffs(pitch_val) - 1; 1673 1674 val = obj->gtt_offset; 1675 if (obj->tiling_mode == I915_TILING_Y) 1676 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 1677 val |= I830_FENCE_SIZE_BITS(size); 1678 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 1679 val |= I830_FENCE_REG_VALID; 1680 } else 1681 val = 0; 1682 1683 I915_WRITE(FENCE_REG_830_0 + reg * 4, val); 1684 POSTING_READ(FENCE_REG_830_0 + reg * 4); 1685 } 1686 1687 static void i915_gem_write_fence(struct drm_device *dev, int reg, 1688 struct drm_i915_gem_object *obj) 1689 { 1690 switch (INTEL_INFO(dev)->gen) { 1691 case 7: 1692 case 6: sandybridge_write_fence_reg(dev, reg, obj); break; 1693 case 5: 1694 case 4: i965_write_fence_reg(dev, reg, obj); break; 1695 case 3: i915_write_fence_reg(dev, reg, obj); break; 1696 case 2: i830_write_fence_reg(dev, reg, obj); break; 1697 default: break; 1698 } 1699 } 1700 1701 static inline int fence_number(struct drm_i915_private *dev_priv, 1702 struct drm_i915_fence_reg *fence) 1703 { 1704 return fence - dev_priv->fence_regs; 1705 } 1706 1707 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, 1708 struct drm_i915_fence_reg *fence, 1709 bool enable) 1710 { 1711 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 1712 int reg = fence_number(dev_priv, fence); 1713 1714 i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL); 1715 1716 if (enable) { 1717 obj->fence_reg = reg; 1718 fence->obj = obj; 1719 list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list); 1720 } else { 1721 obj->fence_reg = I915_FENCE_REG_NONE; 1722 fence->obj = NULL; 1723 list_del_init(&fence->lru_list); 1724 } 1725 } 1726 1727 static int 1728 i915_gem_object_flush_fence(struct drm_i915_gem_object *obj) 1729 { 1730 int ret; 1731 1732 if (obj->fenced_gpu_access) { 1733 if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { 1734 ret = i915_gem_flush_ring(obj->ring, 1735 0, obj->base.write_domain); 1736 if (ret) 1737 return ret; 1738 } 1739 1740 obj->fenced_gpu_access = false; 1741 } 1742 1743 if (obj->last_fenced_seqno) { 1744 ret = i915_wait_seqno(obj->ring, 1745 obj->last_fenced_seqno); 1746 if (ret) 1747 return ret; 1748 1749 obj->last_fenced_seqno = 0; 1750 } 1751 1752 /* Ensure that all CPU reads are completed before installing a fence 1753 * and all writes before removing the fence. 1754 */ 1755 if (obj->base.read_domains & I915_GEM_DOMAIN_GTT) 1756 cpu_mfence(); 1757 1758 return 0; 1759 } 1760 1761 int 1762 i915_gem_object_put_fence(struct drm_i915_gem_object *obj) 1763 { 1764 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 1765 int ret; 1766 1767 ret = i915_gem_object_flush_fence(obj); 1768 if (ret) 1769 return ret; 1770 1771 if (obj->fence_reg == I915_FENCE_REG_NONE) 1772 return 0; 1773 1774 i915_gem_object_update_fence(obj, 1775 &dev_priv->fence_regs[obj->fence_reg], 1776 false); 1777 i915_gem_object_fence_lost(obj); 1778 1779 return 0; 1780 } 1781 1782 static struct drm_i915_fence_reg * 1783 i915_find_fence_reg(struct drm_device *dev) 1784 { 1785 struct drm_i915_private *dev_priv = dev->dev_private; 1786 struct drm_i915_fence_reg *reg, *avail; 1787 int i; 1788 1789 /* First try to find a free reg */ 1790 avail = NULL; 1791 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) { 1792 reg = &dev_priv->fence_regs[i]; 1793 if (!reg->obj) 1794 return reg; 1795 1796 if (!reg->pin_count) 1797 avail = reg; 1798 } 1799 1800 if (avail == NULL) 1801 return NULL; 1802 1803 /* None available, try to steal one or wait for a user to finish */ 1804 list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) { 1805 if (reg->pin_count) 1806 continue; 1807 1808 return reg; 1809 } 1810 1811 return NULL; 1812 } 1813 1814 /** 1815 * i915_gem_object_get_fence - set up fencing for an object 1816 * @obj: object to map through a fence reg 1817 * 1818 * When mapping objects through the GTT, userspace wants to be able to write 1819 * to them without having to worry about swizzling if the object is tiled. 1820 * This function walks the fence regs looking for a free one for @obj, 1821 * stealing one if it can't find any. 1822 * 1823 * It then sets up the reg based on the object's properties: address, pitch 1824 * and tiling format. 1825 * 1826 * For an untiled surface, this removes any existing fence. 1827 */ 1828 int 1829 i915_gem_object_get_fence(struct drm_i915_gem_object *obj) 1830 { 1831 struct drm_device *dev = obj->base.dev; 1832 struct drm_i915_private *dev_priv = dev->dev_private; 1833 bool enable = obj->tiling_mode != I915_TILING_NONE; 1834 struct drm_i915_fence_reg *reg; 1835 int ret; 1836 1837 /* Have we updated the tiling parameters upon the object and so 1838 * will need to serialise the write to the associated fence register? 1839 */ 1840 if (obj->fence_dirty) { 1841 ret = i915_gem_object_flush_fence(obj); 1842 if (ret) 1843 return ret; 1844 } 1845 1846 /* Just update our place in the LRU if our fence is getting reused. */ 1847 if (obj->fence_reg != I915_FENCE_REG_NONE) { 1848 reg = &dev_priv->fence_regs[obj->fence_reg]; 1849 if (!obj->fence_dirty) { 1850 list_move_tail(®->lru_list, 1851 &dev_priv->mm.fence_list); 1852 return 0; 1853 } 1854 } else if (enable) { 1855 reg = i915_find_fence_reg(dev); 1856 if (reg == NULL) 1857 return -EDEADLK; 1858 1859 if (reg->obj) { 1860 struct drm_i915_gem_object *old = reg->obj; 1861 1862 ret = i915_gem_object_flush_fence(old); 1863 if (ret) 1864 return ret; 1865 1866 i915_gem_object_fence_lost(old); 1867 } 1868 } else 1869 return 0; 1870 1871 i915_gem_object_update_fence(obj, reg, enable); 1872 obj->fence_dirty = false; 1873 1874 return 0; 1875 } 1876 1877 static int 1878 i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, 1879 unsigned alignment, bool map_and_fenceable) 1880 { 1881 struct drm_device *dev; 1882 struct drm_i915_private *dev_priv; 1883 struct drm_mm_node *free_space; 1884 uint32_t size, fence_size, fence_alignment, unfenced_alignment; 1885 bool mappable, fenceable; 1886 int ret; 1887 1888 dev = obj->base.dev; 1889 dev_priv = dev->dev_private; 1890 1891 if (obj->madv != I915_MADV_WILLNEED) { 1892 DRM_ERROR("Attempting to bind a purgeable object\n"); 1893 return (-EINVAL); 1894 } 1895 1896 fence_size = i915_gem_get_gtt_size(dev, obj->base.size, 1897 obj->tiling_mode); 1898 fence_alignment = i915_gem_get_gtt_alignment(dev, obj->base.size, 1899 obj->tiling_mode); 1900 unfenced_alignment = i915_gem_get_unfenced_gtt_alignment(dev, 1901 obj->base.size, obj->tiling_mode); 1902 if (alignment == 0) 1903 alignment = map_and_fenceable ? fence_alignment : 1904 unfenced_alignment; 1905 if (map_and_fenceable && (alignment & (fence_alignment - 1)) != 0) { 1906 DRM_ERROR("Invalid object alignment requested %u\n", alignment); 1907 return (-EINVAL); 1908 } 1909 1910 size = map_and_fenceable ? fence_size : obj->base.size; 1911 1912 /* If the object is bigger than the entire aperture, reject it early 1913 * before evicting everything in a vain attempt to find space. 1914 */ 1915 if (obj->base.size > (map_and_fenceable ? 1916 dev_priv->mm.gtt_mappable_end : dev_priv->mm.gtt_total)) { 1917 DRM_ERROR( 1918 "Attempting to bind an object larger than the aperture\n"); 1919 return (-E2BIG); 1920 } 1921 1922 search_free: 1923 if (map_and_fenceable) 1924 free_space = drm_mm_search_free_in_range( 1925 &dev_priv->mm.gtt_space, size, alignment, 0, 1926 dev_priv->mm.gtt_mappable_end, 0); 1927 else 1928 free_space = drm_mm_search_free(&dev_priv->mm.gtt_space, 1929 size, alignment, 0); 1930 if (free_space != NULL) { 1931 int color = 0; 1932 if (map_and_fenceable) 1933 obj->gtt_space = drm_mm_get_block_range_generic( 1934 free_space, size, alignment, color, 0, 1935 dev_priv->mm.gtt_mappable_end, 1); 1936 else 1937 obj->gtt_space = drm_mm_get_block_generic(free_space, 1938 size, alignment, color, 1); 1939 } 1940 if (obj->gtt_space == NULL) { 1941 ret = i915_gem_evict_something(dev, size, alignment, 1942 map_and_fenceable); 1943 if (ret != 0) 1944 return (ret); 1945 goto search_free; 1946 } 1947 1948 /* 1949 * NOTE: i915_gem_object_get_pages_gtt() cannot 1950 * return ENOMEM, since we used VM_ALLOC_RETRY. 1951 */ 1952 ret = i915_gem_object_get_pages_gtt(obj, 0); 1953 if (ret != 0) { 1954 drm_mm_put_block(obj->gtt_space); 1955 obj->gtt_space = NULL; 1956 return (ret); 1957 } 1958 1959 i915_gem_gtt_bind_object(obj, obj->cache_level); 1960 if (ret != 0) { 1961 i915_gem_object_put_pages_gtt(obj); 1962 drm_mm_put_block(obj->gtt_space); 1963 obj->gtt_space = NULL; 1964 if (i915_gem_evict_everything(dev)) 1965 return (ret); 1966 goto search_free; 1967 } 1968 1969 list_add_tail(&obj->gtt_list, &dev_priv->mm.gtt_list); 1970 list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list); 1971 1972 obj->gtt_offset = obj->gtt_space->start; 1973 1974 fenceable = 1975 obj->gtt_space->size == fence_size && 1976 (obj->gtt_space->start & (fence_alignment - 1)) == 0; 1977 1978 mappable = 1979 obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end; 1980 obj->map_and_fenceable = mappable && fenceable; 1981 1982 return (0); 1983 } 1984 1985 void 1986 i915_gem_clflush_object(struct drm_i915_gem_object *obj) 1987 { 1988 1989 /* If we don't have a page list set up, then we're not pinned 1990 * to GPU, and we can ignore the cache flush because it'll happen 1991 * again at bind time. 1992 */ 1993 if (obj->pages == NULL) 1994 return; 1995 1996 /* If the GPU is snooping the contents of the CPU cache, 1997 * we do not need to manually clear the CPU cache lines. However, 1998 * the caches are only snooped when the render cache is 1999 * flushed/invalidated. As we always have to emit invalidations 2000 * and flushes when moving into and out of the RENDER domain, correct 2001 * snooping behaviour occurs naturally as the result of our domain 2002 * tracking. 2003 */ 2004 if (obj->cache_level != I915_CACHE_NONE) 2005 return; 2006 2007 drm_clflush_pages(obj->pages, obj->base.size / PAGE_SIZE); 2008 } 2009 2010 /** Flushes the GTT write domain for the object if it's dirty. */ 2011 static void 2012 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) 2013 { 2014 uint32_t old_write_domain; 2015 2016 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) 2017 return; 2018 2019 /* No actual flushing is required for the GTT write domain. Writes 2020 * to it immediately go to main memory as far as we know, so there's 2021 * no chipset flush. It also doesn't land in render cache. 2022 * 2023 * However, we do have to enforce the order so that all writes through 2024 * the GTT land before any writes to the device, such as updates to 2025 * the GATT itself. 2026 */ 2027 cpu_sfence(); 2028 2029 old_write_domain = obj->base.write_domain; 2030 obj->base.write_domain = 0; 2031 } 2032 2033 /** Flushes the CPU write domain for the object if it's dirty. */ 2034 static void 2035 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) 2036 { 2037 uint32_t old_write_domain; 2038 2039 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) 2040 return; 2041 2042 i915_gem_clflush_object(obj); 2043 intel_gtt_chipset_flush(); 2044 old_write_domain = obj->base.write_domain; 2045 obj->base.write_domain = 0; 2046 } 2047 2048 static int 2049 i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj) 2050 { 2051 2052 if ((obj->base.write_domain & I915_GEM_GPU_DOMAINS) == 0) 2053 return (0); 2054 return (i915_gem_flush_ring(obj->ring, 0, obj->base.write_domain)); 2055 } 2056 2057 /** 2058 * Moves a single object to the GTT read, and possibly write domain. 2059 * 2060 * This function returns when the move is complete, including waiting on 2061 * flushes to occur. 2062 */ 2063 int 2064 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 2065 { 2066 drm_i915_private_t *dev_priv = obj->base.dev->dev_private; 2067 uint32_t old_write_domain, old_read_domains; 2068 int ret; 2069 2070 /* Not valid to be called on unbound objects. */ 2071 if (obj->gtt_space == NULL) 2072 return -EINVAL; 2073 2074 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) 2075 return 0; 2076 2077 ret = i915_gem_object_flush_gpu_write_domain(obj); 2078 if (ret) 2079 return ret; 2080 2081 ret = i915_gem_object_wait_rendering(obj, !write); 2082 if (ret) 2083 return ret; 2084 2085 i915_gem_object_flush_cpu_write_domain(obj); 2086 2087 old_write_domain = obj->base.write_domain; 2088 old_read_domains = obj->base.read_domains; 2089 2090 /* It should now be out of any other write domains, and we can update 2091 * the domain values for our changes. 2092 */ 2093 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 2094 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 2095 if (write) { 2096 obj->base.read_domains = I915_GEM_DOMAIN_GTT; 2097 obj->base.write_domain = I915_GEM_DOMAIN_GTT; 2098 obj->dirty = 1; 2099 } 2100 2101 /* And bump the LRU for this access */ 2102 if (i915_gem_object_is_inactive(obj)) 2103 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list); 2104 2105 return 0; 2106 } 2107 2108 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 2109 enum i915_cache_level cache_level) 2110 { 2111 struct drm_device *dev = obj->base.dev; 2112 drm_i915_private_t *dev_priv = dev->dev_private; 2113 int ret; 2114 2115 if (obj->cache_level == cache_level) 2116 return 0; 2117 2118 if (obj->pin_count) { 2119 DRM_DEBUG("can not change the cache level of pinned objects\n"); 2120 return -EBUSY; 2121 } 2122 2123 if (obj->gtt_space) { 2124 ret = i915_gem_object_finish_gpu(obj); 2125 if (ret != 0) 2126 return (ret); 2127 2128 i915_gem_object_finish_gtt(obj); 2129 2130 /* Before SandyBridge, you could not use tiling or fence 2131 * registers with snooped memory, so relinquish any fences 2132 * currently pointing to our region in the aperture. 2133 */ 2134 if (INTEL_INFO(obj->base.dev)->gen < 6) { 2135 ret = i915_gem_object_put_fence(obj); 2136 if (ret) 2137 return ret; 2138 } 2139 2140 if (obj->has_global_gtt_mapping) 2141 i915_gem_gtt_bind_object(obj, cache_level); 2142 if (obj->has_aliasing_ppgtt_mapping) 2143 i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt, 2144 obj, cache_level); 2145 } 2146 2147 if (cache_level == I915_CACHE_NONE) { 2148 u32 old_read_domains, old_write_domain; 2149 2150 /* If we're coming from LLC cached, then we haven't 2151 * actually been tracking whether the data is in the 2152 * CPU cache or not, since we only allow one bit set 2153 * in obj->write_domain and have been skipping the clflushes. 2154 * Just set it to the CPU cache for now. 2155 */ 2156 KASSERT((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) == 0, 2157 ("obj %p in CPU write domain", obj)); 2158 KASSERT((obj->base.read_domains & ~I915_GEM_DOMAIN_CPU) == 0, 2159 ("obj %p in CPU read domain", obj)); 2160 2161 old_read_domains = obj->base.read_domains; 2162 old_write_domain = obj->base.write_domain; 2163 2164 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 2165 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 2166 2167 } 2168 2169 obj->cache_level = cache_level; 2170 return 0; 2171 } 2172 2173 /* 2174 * Prepare buffer for display plane (scanout, cursors, etc). 2175 * Can be called from an uninterruptible phase (modesetting) and allows 2176 * any flushes to be pipelined (for pageflips). 2177 */ 2178 int 2179 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 2180 u32 alignment, 2181 struct intel_ring_buffer *pipelined) 2182 { 2183 u32 old_read_domains, old_write_domain; 2184 int ret; 2185 2186 ret = i915_gem_object_flush_gpu_write_domain(obj); 2187 if (ret) 2188 return ret; 2189 2190 if (pipelined != obj->ring) { 2191 ret = i915_gem_object_sync(obj, pipelined); 2192 if (ret) 2193 return ret; 2194 } 2195 2196 /* The display engine is not coherent with the LLC cache on gen6. As 2197 * a result, we make sure that the pinning that is about to occur is 2198 * done with uncached PTEs. This is lowest common denominator for all 2199 * chipsets. 2200 * 2201 * However for gen6+, we could do better by using the GFDT bit instead 2202 * of uncaching, which would allow us to flush all the LLC-cached data 2203 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 2204 */ 2205 ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE); 2206 if (ret) 2207 return ret; 2208 2209 /* As the user may map the buffer once pinned in the display plane 2210 * (e.g. libkms for the bootup splash), we have to ensure that we 2211 * always use map_and_fenceable for all scanout buffers. 2212 */ 2213 ret = i915_gem_object_pin(obj, alignment, true); 2214 if (ret) 2215 return ret; 2216 2217 i915_gem_object_flush_cpu_write_domain(obj); 2218 2219 old_write_domain = obj->base.write_domain; 2220 old_read_domains = obj->base.read_domains; 2221 2222 /* It should now be out of any other write domains, and we can update 2223 * the domain values for our changes. 2224 */ 2225 obj->base.write_domain = 0; 2226 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 2227 2228 return 0; 2229 } 2230 2231 int 2232 i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj) 2233 { 2234 int ret; 2235 2236 if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0) 2237 return 0; 2238 2239 if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { 2240 ret = i915_gem_flush_ring(obj->ring, 0, obj->base.write_domain); 2241 if (ret) 2242 return ret; 2243 } 2244 2245 ret = i915_gem_object_wait_rendering(obj, false); 2246 if (ret) 2247 return ret; 2248 2249 /* Ensure that we invalidate the GPU's caches and TLBs. */ 2250 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS; 2251 return 0; 2252 } 2253 2254 /** 2255 * Moves a single object to the CPU read, and possibly write domain. 2256 * 2257 * This function returns when the move is complete, including waiting on 2258 * flushes to occur. 2259 */ 2260 int 2261 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 2262 { 2263 uint32_t old_write_domain, old_read_domains; 2264 int ret; 2265 2266 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) 2267 return 0; 2268 2269 ret = i915_gem_object_flush_gpu_write_domain(obj); 2270 if (ret) 2271 return ret; 2272 2273 ret = i915_gem_object_wait_rendering(obj, !write); 2274 if (ret) 2275 return ret; 2276 2277 i915_gem_object_flush_gtt_write_domain(obj); 2278 2279 old_write_domain = obj->base.write_domain; 2280 old_read_domains = obj->base.read_domains; 2281 2282 /* Flush the CPU cache if it's still invalid. */ 2283 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { 2284 i915_gem_clflush_object(obj); 2285 2286 obj->base.read_domains |= I915_GEM_DOMAIN_CPU; 2287 } 2288 2289 /* It should now be out of any other write domains, and we can update 2290 * the domain values for our changes. 2291 */ 2292 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 2293 2294 /* If we're writing through the CPU, then the GPU read domains will 2295 * need to be invalidated at next use. 2296 */ 2297 if (write) { 2298 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 2299 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 2300 } 2301 2302 return 0; 2303 } 2304 2305 /* Throttle our rendering by waiting until the ring has completed our requests 2306 * emitted over 20 msec ago. 2307 * 2308 * Note that if we were to use the current jiffies each time around the loop, 2309 * we wouldn't escape the function with any frames outstanding if the time to 2310 * render a frame was over 20ms. 2311 * 2312 * This should get us reasonable parallelism between CPU and GPU but also 2313 * relatively low latency when blocking on a particular request to finish. 2314 */ 2315 static int 2316 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 2317 { 2318 struct drm_i915_private *dev_priv = dev->dev_private; 2319 struct drm_i915_file_private *file_priv = file->driver_priv; 2320 unsigned long recent_enough = ticks - (20 * hz / 1000); 2321 struct drm_i915_gem_request *request; 2322 struct intel_ring_buffer *ring = NULL; 2323 u32 seqno = 0; 2324 int ret; 2325 2326 if (atomic_read(&dev_priv->mm.wedged)) 2327 return -EIO; 2328 2329 spin_lock(&file_priv->mm.lock); 2330 list_for_each_entry(request, &file_priv->mm.request_list, client_list) { 2331 if (time_after_eq(request->emitted_jiffies, recent_enough)) 2332 break; 2333 2334 ring = request->ring; 2335 seqno = request->seqno; 2336 } 2337 spin_unlock(&file_priv->mm.lock); 2338 2339 if (seqno == 0) 2340 return 0; 2341 2342 ret = __wait_seqno(ring, seqno, true, NULL); 2343 2344 if (ret == 0) 2345 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0); 2346 2347 return ret; 2348 } 2349 2350 int 2351 i915_gem_object_pin(struct drm_i915_gem_object *obj, 2352 uint32_t alignment, 2353 bool map_and_fenceable) 2354 { 2355 int ret; 2356 2357 BUG_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT); 2358 2359 if (obj->gtt_space != NULL) { 2360 if ((alignment && obj->gtt_offset & (alignment - 1)) || 2361 (map_and_fenceable && !obj->map_and_fenceable)) { 2362 WARN(obj->pin_count, 2363 "bo is already pinned with incorrect alignment:" 2364 " offset=%x, req.alignment=%x, req.map_and_fenceable=%d," 2365 " obj->map_and_fenceable=%d\n", 2366 obj->gtt_offset, alignment, 2367 map_and_fenceable, 2368 obj->map_and_fenceable); 2369 ret = i915_gem_object_unbind(obj); 2370 if (ret) 2371 return ret; 2372 } 2373 } 2374 2375 if (obj->gtt_space == NULL) { 2376 ret = i915_gem_object_bind_to_gtt(obj, alignment, 2377 map_and_fenceable); 2378 if (ret) 2379 return ret; 2380 } 2381 2382 if (!obj->has_global_gtt_mapping && map_and_fenceable) 2383 i915_gem_gtt_bind_object(obj, obj->cache_level); 2384 2385 obj->pin_count++; 2386 obj->pin_mappable |= map_and_fenceable; 2387 2388 return 0; 2389 } 2390 2391 void 2392 i915_gem_object_unpin(struct drm_i915_gem_object *obj) 2393 { 2394 BUG_ON(obj->pin_count == 0); 2395 BUG_ON(obj->gtt_space == NULL); 2396 2397 if (--obj->pin_count == 0) 2398 obj->pin_mappable = false; 2399 } 2400 2401 int 2402 i915_gem_pin_ioctl(struct drm_device *dev, void *data, 2403 struct drm_file *file) 2404 { 2405 struct drm_i915_gem_pin *args; 2406 struct drm_i915_gem_object *obj; 2407 struct drm_gem_object *gobj; 2408 int ret; 2409 2410 args = data; 2411 2412 ret = i915_mutex_lock_interruptible(dev); 2413 if (ret != 0) 2414 return ret; 2415 2416 gobj = drm_gem_object_lookup(dev, file, args->handle); 2417 if (gobj == NULL) { 2418 ret = -ENOENT; 2419 goto unlock; 2420 } 2421 obj = to_intel_bo(gobj); 2422 2423 if (obj->madv != I915_MADV_WILLNEED) { 2424 DRM_ERROR("Attempting to pin a purgeable buffer\n"); 2425 ret = -EINVAL; 2426 goto out; 2427 } 2428 2429 if (obj->pin_filp != NULL && obj->pin_filp != file) { 2430 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n", 2431 args->handle); 2432 ret = -EINVAL; 2433 goto out; 2434 } 2435 2436 obj->user_pin_count++; 2437 obj->pin_filp = file; 2438 if (obj->user_pin_count == 1) { 2439 ret = i915_gem_object_pin(obj, args->alignment, true); 2440 if (ret != 0) 2441 goto out; 2442 } 2443 2444 /* XXX - flush the CPU caches for pinned objects 2445 * as the X server doesn't manage domains yet 2446 */ 2447 i915_gem_object_flush_cpu_write_domain(obj); 2448 args->offset = obj->gtt_offset; 2449 out: 2450 drm_gem_object_unreference(&obj->base); 2451 unlock: 2452 DRM_UNLOCK(dev); 2453 return (ret); 2454 } 2455 2456 int 2457 i915_gem_unpin_ioctl(struct drm_device *dev, void *data, 2458 struct drm_file *file) 2459 { 2460 struct drm_i915_gem_pin *args; 2461 struct drm_i915_gem_object *obj; 2462 int ret; 2463 2464 args = data; 2465 ret = i915_mutex_lock_interruptible(dev); 2466 if (ret != 0) 2467 return (ret); 2468 2469 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 2470 if (&obj->base == NULL) { 2471 ret = -ENOENT; 2472 goto unlock; 2473 } 2474 2475 if (obj->pin_filp != file) { 2476 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n", 2477 args->handle); 2478 ret = -EINVAL; 2479 goto out; 2480 } 2481 obj->user_pin_count--; 2482 if (obj->user_pin_count == 0) { 2483 obj->pin_filp = NULL; 2484 i915_gem_object_unpin(obj); 2485 } 2486 2487 out: 2488 drm_gem_object_unreference(&obj->base); 2489 unlock: 2490 DRM_UNLOCK(dev); 2491 return (ret); 2492 } 2493 2494 int 2495 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 2496 struct drm_file *file) 2497 { 2498 struct drm_i915_gem_busy *args = data; 2499 struct drm_i915_gem_object *obj; 2500 int ret; 2501 2502 ret = i915_mutex_lock_interruptible(dev); 2503 if (ret) 2504 return ret; 2505 2506 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 2507 if (&obj->base == NULL) { 2508 ret = -ENOENT; 2509 goto unlock; 2510 } 2511 2512 /* Count all active objects as busy, even if they are currently not used 2513 * by the gpu. Users of this interface expect objects to eventually 2514 * become non-busy without any further actions, therefore emit any 2515 * necessary flushes here. 2516 */ 2517 ret = i915_gem_object_flush_active(obj); 2518 2519 args->busy = obj->active; 2520 if (obj->ring) { 2521 args->busy |= intel_ring_flag(obj->ring) << 17; 2522 } 2523 2524 drm_gem_object_unreference(&obj->base); 2525 unlock: 2526 DRM_UNLOCK(dev); 2527 return ret; 2528 } 2529 2530 int 2531 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 2532 struct drm_file *file_priv) 2533 { 2534 2535 return (i915_gem_ring_throttle(dev, file_priv)); 2536 } 2537 2538 int 2539 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 2540 struct drm_file *file_priv) 2541 { 2542 struct drm_i915_gem_madvise *args = data; 2543 struct drm_i915_gem_object *obj; 2544 int ret; 2545 2546 switch (args->madv) { 2547 case I915_MADV_DONTNEED: 2548 case I915_MADV_WILLNEED: 2549 break; 2550 default: 2551 return -EINVAL; 2552 } 2553 2554 ret = i915_mutex_lock_interruptible(dev); 2555 if (ret) 2556 return ret; 2557 2558 obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle)); 2559 if (&obj->base == NULL) { 2560 ret = -ENOENT; 2561 goto unlock; 2562 } 2563 2564 if (obj->pin_count) { 2565 ret = -EINVAL; 2566 goto out; 2567 } 2568 2569 if (obj->madv != __I915_MADV_PURGED) 2570 obj->madv = args->madv; 2571 2572 /* if the object is no longer attached, discard its backing storage */ 2573 if (i915_gem_object_is_purgeable(obj) && obj->pages == NULL) 2574 i915_gem_object_truncate(obj); 2575 2576 args->retained = obj->madv != __I915_MADV_PURGED; 2577 2578 out: 2579 drm_gem_object_unreference(&obj->base); 2580 unlock: 2581 DRM_UNLOCK(dev); 2582 return ret; 2583 } 2584 2585 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, 2586 size_t size) 2587 { 2588 struct drm_i915_private *dev_priv; 2589 struct drm_i915_gem_object *obj; 2590 2591 dev_priv = dev->dev_private; 2592 2593 obj = kmalloc(sizeof(*obj), DRM_I915_GEM, M_WAITOK | M_ZERO); 2594 2595 if (drm_gem_object_init(dev, &obj->base, size) != 0) { 2596 drm_free(obj, DRM_I915_GEM); 2597 return (NULL); 2598 } 2599 2600 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 2601 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 2602 2603 if (HAS_LLC(dev)) 2604 obj->cache_level = I915_CACHE_LLC; 2605 else 2606 obj->cache_level = I915_CACHE_NONE; 2607 obj->base.driver_private = NULL; 2608 obj->fence_reg = I915_FENCE_REG_NONE; 2609 INIT_LIST_HEAD(&obj->mm_list); 2610 INIT_LIST_HEAD(&obj->gtt_list); 2611 INIT_LIST_HEAD(&obj->ring_list); 2612 INIT_LIST_HEAD(&obj->exec_list); 2613 INIT_LIST_HEAD(&obj->gpu_write_list); 2614 obj->madv = I915_MADV_WILLNEED; 2615 /* Avoid an unnecessary call to unbind on the first bind. */ 2616 obj->map_and_fenceable = true; 2617 2618 i915_gem_info_add_obj(dev_priv, size); 2619 2620 return (obj); 2621 } 2622 2623 int i915_gem_init_object(struct drm_gem_object *obj) 2624 { 2625 BUG(); 2626 2627 return 0; 2628 } 2629 2630 void i915_gem_free_object(struct drm_gem_object *gem_obj) 2631 { 2632 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 2633 struct drm_device *dev = obj->base.dev; 2634 drm_i915_private_t *dev_priv = dev->dev_private; 2635 2636 if (obj->phys_obj) 2637 i915_gem_detach_phys_object(dev, obj); 2638 2639 obj->pin_count = 0; 2640 if (WARN_ON(i915_gem_object_unbind(obj) == -ERESTARTSYS)) { 2641 bool was_interruptible; 2642 2643 was_interruptible = dev_priv->mm.interruptible; 2644 dev_priv->mm.interruptible = false; 2645 2646 WARN_ON(i915_gem_object_unbind(obj)); 2647 2648 dev_priv->mm.interruptible = was_interruptible; 2649 } 2650 2651 drm_gem_free_mmap_offset(&obj->base); 2652 2653 drm_gem_object_release(&obj->base); 2654 i915_gem_info_remove_obj(dev_priv, obj->base.size); 2655 2656 drm_free(obj->bit_17, DRM_I915_GEM); 2657 drm_free(obj, DRM_I915_GEM); 2658 } 2659 2660 int 2661 i915_gem_do_init(struct drm_device *dev, unsigned long start, 2662 unsigned long mappable_end, unsigned long end) 2663 { 2664 drm_i915_private_t *dev_priv; 2665 unsigned long mappable; 2666 int error; 2667 2668 dev_priv = dev->dev_private; 2669 mappable = min(end, mappable_end) - start; 2670 2671 drm_mm_init(&dev_priv->mm.gtt_space, start, end - start); 2672 2673 dev_priv->mm.gtt_start = start; 2674 dev_priv->mm.gtt_mappable_end = mappable_end; 2675 dev_priv->mm.gtt_end = end; 2676 dev_priv->mm.gtt_total = end - start; 2677 dev_priv->mm.mappable_gtt_total = mappable; 2678 2679 /* Take over this portion of the GTT */ 2680 intel_gtt_clear_range(start / PAGE_SIZE, (end-start) / PAGE_SIZE); 2681 device_printf(dev->dev, 2682 "taking over the fictitious range 0x%lx-0x%lx\n", 2683 dev->agp->base + start, dev->agp->base + start + mappable); 2684 error = -vm_phys_fictitious_reg_range(dev->agp->base + start, 2685 dev->agp->base + start + mappable, VM_MEMATTR_WRITE_COMBINING); 2686 return (error); 2687 } 2688 2689 int 2690 i915_gem_idle(struct drm_device *dev) 2691 { 2692 drm_i915_private_t *dev_priv = dev->dev_private; 2693 int ret; 2694 2695 DRM_LOCK(dev); 2696 2697 if (dev_priv->mm.suspended) { 2698 DRM_UNLOCK(dev); 2699 return 0; 2700 } 2701 2702 ret = i915_gpu_idle(dev); 2703 if (ret) { 2704 DRM_UNLOCK(dev); 2705 return ret; 2706 } 2707 i915_gem_retire_requests(dev); 2708 2709 /* Under UMS, be paranoid and evict. */ 2710 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 2711 i915_gem_evict_everything(dev); 2712 2713 i915_gem_reset_fences(dev); 2714 2715 /* Hack! Don't let anybody do execbuf while we don't control the chip. 2716 * We need to replace this with a semaphore, or something. 2717 * And not confound mm.suspended! 2718 */ 2719 dev_priv->mm.suspended = 1; 2720 del_timer_sync(&dev_priv->hangcheck_timer); 2721 2722 i915_kernel_lost_context(dev); 2723 i915_gem_cleanup_ringbuffer(dev); 2724 2725 DRM_UNLOCK(dev); 2726 2727 /* Cancel the retire work handler, which should be idle now. */ 2728 cancel_delayed_work_sync(&dev_priv->mm.retire_work); 2729 2730 return 0; 2731 } 2732 2733 void i915_gem_l3_remap(struct drm_device *dev) 2734 { 2735 drm_i915_private_t *dev_priv = dev->dev_private; 2736 u32 misccpctl; 2737 int i; 2738 2739 if (!HAS_L3_GPU_CACHE(dev)) 2740 return; 2741 2742 if (!dev_priv->l3_parity.remap_info) 2743 return; 2744 2745 misccpctl = I915_READ(GEN7_MISCCPCTL); 2746 I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE); 2747 POSTING_READ(GEN7_MISCCPCTL); 2748 2749 for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) { 2750 u32 remap = I915_READ(GEN7_L3LOG_BASE + i); 2751 if (remap && remap != dev_priv->l3_parity.remap_info[i/4]) 2752 DRM_DEBUG("0x%x was already programmed to %x\n", 2753 GEN7_L3LOG_BASE + i, remap); 2754 if (remap && !dev_priv->l3_parity.remap_info[i/4]) 2755 DRM_DEBUG_DRIVER("Clearing remapped register\n"); 2756 I915_WRITE(GEN7_L3LOG_BASE + i, dev_priv->l3_parity.remap_info[i/4]); 2757 } 2758 2759 /* Make sure all the writes land before disabling dop clock gating */ 2760 POSTING_READ(GEN7_L3LOG_BASE); 2761 2762 I915_WRITE(GEN7_MISCCPCTL, misccpctl); 2763 } 2764 2765 void 2766 i915_gem_init_swizzling(struct drm_device *dev) 2767 { 2768 drm_i915_private_t *dev_priv; 2769 2770 dev_priv = dev->dev_private; 2771 2772 if (INTEL_INFO(dev)->gen < 5 || 2773 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 2774 return; 2775 2776 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 2777 DISP_TILE_SURFACE_SWIZZLING); 2778 2779 if (IS_GEN5(dev)) 2780 return; 2781 2782 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 2783 if (IS_GEN6(dev)) 2784 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 2785 else 2786 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 2787 } 2788 2789 static bool 2790 intel_enable_blt(struct drm_device *dev) 2791 { 2792 int revision; 2793 2794 if (!HAS_BLT(dev)) 2795 return false; 2796 2797 /* The blitter was dysfunctional on early prototypes */ 2798 revision = pci_read_config(dev->dev, PCIR_REVID, 1); 2799 if (IS_GEN6(dev) && revision < 8) { 2800 DRM_INFO("BLT not supported on this pre-production hardware;" 2801 " graphics performance will be degraded.\n"); 2802 return false; 2803 } 2804 2805 return true; 2806 } 2807 2808 int 2809 i915_gem_init_hw(struct drm_device *dev) 2810 { 2811 drm_i915_private_t *dev_priv = dev->dev_private; 2812 int ret; 2813 2814 if (IS_HASWELL(dev) && (I915_READ(0x120010) == 1)) 2815 I915_WRITE(0x9008, I915_READ(0x9008) | 0xf0000); 2816 2817 i915_gem_l3_remap(dev); 2818 2819 i915_gem_init_swizzling(dev); 2820 2821 ret = intel_init_render_ring_buffer(dev); 2822 if (ret) 2823 return ret; 2824 2825 if (HAS_BSD(dev)) { 2826 ret = intel_init_bsd_ring_buffer(dev); 2827 if (ret) 2828 goto cleanup_render_ring; 2829 } 2830 2831 if (intel_enable_blt(dev)) { 2832 ret = intel_init_blt_ring_buffer(dev); 2833 if (ret) 2834 goto cleanup_bsd_ring; 2835 } 2836 2837 dev_priv->next_seqno = 1; 2838 2839 /* 2840 * XXX: There was some w/a described somewhere suggesting loading 2841 * contexts before PPGTT. 2842 */ 2843 #if 0 /* XXX: HW context support */ 2844 i915_gem_context_init(dev); 2845 #endif 2846 i915_gem_init_ppgtt(dev); 2847 2848 return 0; 2849 2850 cleanup_bsd_ring: 2851 intel_cleanup_ring_buffer(&dev_priv->ring[VCS]); 2852 cleanup_render_ring: 2853 intel_cleanup_ring_buffer(&dev_priv->ring[RCS]); 2854 return ret; 2855 } 2856 2857 static bool 2858 intel_enable_ppgtt(struct drm_device *dev) 2859 { 2860 if (i915_enable_ppgtt >= 0) 2861 return i915_enable_ppgtt; 2862 2863 /* Disable ppgtt on SNB if VT-d is on. */ 2864 if (INTEL_INFO(dev)->gen == 6 && intel_iommu_enabled) 2865 return false; 2866 2867 return true; 2868 } 2869 2870 int i915_gem_init(struct drm_device *dev) 2871 { 2872 struct drm_i915_private *dev_priv = dev->dev_private; 2873 unsigned long prealloc_size, gtt_size, mappable_size; 2874 int ret; 2875 2876 prealloc_size = dev_priv->mm.gtt->stolen_size; 2877 gtt_size = dev_priv->mm.gtt->gtt_total_entries << PAGE_SHIFT; 2878 mappable_size = dev_priv->mm.gtt->gtt_mappable_entries << PAGE_SHIFT; 2879 2880 /* Basic memrange allocator for stolen space */ 2881 drm_mm_init(&dev_priv->mm.stolen, 0, prealloc_size); 2882 2883 DRM_LOCK(dev); 2884 if (intel_enable_ppgtt(dev) && HAS_ALIASING_PPGTT(dev)) { 2885 /* PPGTT pdes are stolen from global gtt ptes, so shrink the 2886 * aperture accordingly when using aliasing ppgtt. */ 2887 gtt_size -= I915_PPGTT_PD_ENTRIES*PAGE_SIZE; 2888 /* For paranoia keep the guard page in between. */ 2889 gtt_size -= PAGE_SIZE; 2890 2891 i915_gem_do_init(dev, 0, mappable_size, gtt_size); 2892 2893 ret = i915_gem_init_aliasing_ppgtt(dev); 2894 if (ret) { 2895 DRM_UNLOCK(dev); 2896 return ret; 2897 } 2898 } else { 2899 /* Let GEM Manage all of the aperture. 2900 * 2901 * However, leave one page at the end still bound to the scratch 2902 * page. There are a number of places where the hardware 2903 * apparently prefetches past the end of the object, and we've 2904 * seen multiple hangs with the GPU head pointer stuck in a 2905 * batchbuffer bound at the last page of the aperture. One page 2906 * should be enough to keep any prefetching inside of the 2907 * aperture. 2908 */ 2909 i915_gem_do_init(dev, 0, mappable_size, gtt_size - PAGE_SIZE); 2910 } 2911 2912 ret = i915_gem_init_hw(dev); 2913 DRM_UNLOCK(dev); 2914 if (ret != 0) { 2915 i915_gem_cleanup_aliasing_ppgtt(dev); 2916 return (ret); 2917 } 2918 2919 #if 0 2920 /* Try to set up FBC with a reasonable compressed buffer size */ 2921 if (I915_HAS_FBC(dev) && i915_powersave) { 2922 int cfb_size; 2923 2924 /* Leave 1M for line length buffer & misc. */ 2925 2926 /* Try to get a 32M buffer... */ 2927 if (prealloc_size > (36*1024*1024)) 2928 cfb_size = 32*1024*1024; 2929 else /* fall back to 7/8 of the stolen space */ 2930 cfb_size = prealloc_size * 7 / 8; 2931 i915_setup_compression(dev, cfb_size); 2932 } 2933 #endif 2934 2935 /* Allow hardware batchbuffers unless told otherwise, but not for KMS. */ 2936 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 2937 dev_priv->dri1.allow_batchbuffer = 1; 2938 return 0; 2939 } 2940 2941 void 2942 i915_gem_cleanup_ringbuffer(struct drm_device *dev) 2943 { 2944 drm_i915_private_t *dev_priv; 2945 int i; 2946 2947 dev_priv = dev->dev_private; 2948 for (i = 0; i < I915_NUM_RINGS; i++) 2949 intel_cleanup_ring_buffer(&dev_priv->ring[i]); 2950 } 2951 2952 int 2953 i915_gem_entervt_ioctl(struct drm_device *dev, void *data, 2954 struct drm_file *file_priv) 2955 { 2956 drm_i915_private_t *dev_priv = dev->dev_private; 2957 int ret; 2958 2959 if (drm_core_check_feature(dev, DRIVER_MODESET)) 2960 return 0; 2961 2962 if (atomic_read(&dev_priv->mm.wedged)) { 2963 DRM_ERROR("Reenabling wedged hardware, good luck\n"); 2964 atomic_set(&dev_priv->mm.wedged, 0); 2965 } 2966 2967 DRM_LOCK(dev); 2968 dev_priv->mm.suspended = 0; 2969 2970 ret = i915_gem_init_hw(dev); 2971 if (ret != 0) { 2972 DRM_UNLOCK(dev); 2973 return ret; 2974 } 2975 2976 KASSERT(list_empty(&dev_priv->mm.active_list), ("active list")); 2977 BUG_ON(!list_empty(&dev_priv->mm.inactive_list)); 2978 DRM_UNLOCK(dev); 2979 2980 ret = drm_irq_install(dev); 2981 if (ret) 2982 goto cleanup_ringbuffer; 2983 2984 return 0; 2985 2986 cleanup_ringbuffer: 2987 DRM_LOCK(dev); 2988 i915_gem_cleanup_ringbuffer(dev); 2989 dev_priv->mm.suspended = 1; 2990 DRM_UNLOCK(dev); 2991 2992 return ret; 2993 } 2994 2995 int 2996 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, 2997 struct drm_file *file_priv) 2998 { 2999 3000 if (drm_core_check_feature(dev, DRIVER_MODESET)) 3001 return 0; 3002 3003 drm_irq_uninstall(dev); 3004 return (i915_gem_idle(dev)); 3005 } 3006 3007 void 3008 i915_gem_lastclose(struct drm_device *dev) 3009 { 3010 int ret; 3011 3012 if (drm_core_check_feature(dev, DRIVER_MODESET)) 3013 return; 3014 3015 ret = i915_gem_idle(dev); 3016 if (ret != 0) 3017 DRM_ERROR("failed to idle hardware: %d\n", ret); 3018 } 3019 3020 static void 3021 init_ring_lists(struct intel_ring_buffer *ring) 3022 { 3023 3024 INIT_LIST_HEAD(&ring->active_list); 3025 INIT_LIST_HEAD(&ring->request_list); 3026 INIT_LIST_HEAD(&ring->gpu_write_list); 3027 } 3028 3029 void 3030 i915_gem_load(struct drm_device *dev) 3031 { 3032 int i; 3033 drm_i915_private_t *dev_priv = dev->dev_private; 3034 3035 INIT_LIST_HEAD(&dev_priv->mm.active_list); 3036 INIT_LIST_HEAD(&dev_priv->mm.inactive_list); 3037 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 3038 INIT_LIST_HEAD(&dev_priv->mm.gtt_list); 3039 for (i = 0; i < I915_NUM_RINGS; i++) 3040 init_ring_lists(&dev_priv->ring[i]); 3041 for (i = 0; i < I915_MAX_NUM_FENCES; i++) 3042 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); 3043 INIT_DELAYED_WORK(&dev_priv->mm.retire_work, 3044 i915_gem_retire_work_handler); 3045 init_completion(&dev_priv->error_completion); 3046 3047 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */ 3048 if (IS_GEN3(dev)) { 3049 I915_WRITE(MI_ARB_STATE, 3050 _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE)); 3051 } 3052 3053 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; 3054 3055 /* Old X drivers will take 0-2 for front, back, depth buffers */ 3056 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 3057 dev_priv->fence_reg_start = 3; 3058 3059 if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) 3060 dev_priv->num_fence_regs = 16; 3061 else 3062 dev_priv->num_fence_regs = 8; 3063 3064 /* Initialize fence registers to zero */ 3065 i915_gem_reset_fences(dev); 3066 3067 i915_gem_detect_bit_6_swizzle(dev); 3068 init_waitqueue_head(&dev_priv->pending_flip_queue); 3069 3070 dev_priv->mm.interruptible = true; 3071 3072 #if 0 3073 dev_priv->mm.inactive_shrinker.shrink = i915_gem_inactive_shrink; 3074 dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS; 3075 register_shrinker(&dev_priv->mm.inactive_shrinker); 3076 #else 3077 dev_priv->mm.i915_lowmem = EVENTHANDLER_REGISTER(vm_lowmem, 3078 i915_gem_lowmem, dev, EVENTHANDLER_PRI_ANY); 3079 #endif 3080 } 3081 3082 static int 3083 i915_gem_init_phys_object(struct drm_device *dev, int id, int size, int align) 3084 { 3085 drm_i915_private_t *dev_priv; 3086 struct drm_i915_gem_phys_object *phys_obj; 3087 int ret; 3088 3089 dev_priv = dev->dev_private; 3090 if (dev_priv->mm.phys_objs[id - 1] != NULL || size == 0) 3091 return (0); 3092 3093 phys_obj = kmalloc(sizeof(struct drm_i915_gem_phys_object), DRM_I915_GEM, 3094 M_WAITOK | M_ZERO); 3095 3096 phys_obj->id = id; 3097 3098 phys_obj->handle = drm_pci_alloc(dev, size, align, ~0); 3099 if (phys_obj->handle == NULL) { 3100 ret = -ENOMEM; 3101 goto free_obj; 3102 } 3103 pmap_change_attr((vm_offset_t)phys_obj->handle->vaddr, 3104 size / PAGE_SIZE, PAT_WRITE_COMBINING); 3105 3106 dev_priv->mm.phys_objs[id - 1] = phys_obj; 3107 3108 return (0); 3109 3110 free_obj: 3111 drm_free(phys_obj, DRM_I915_GEM); 3112 return (ret); 3113 } 3114 3115 static void 3116 i915_gem_free_phys_object(struct drm_device *dev, int id) 3117 { 3118 drm_i915_private_t *dev_priv; 3119 struct drm_i915_gem_phys_object *phys_obj; 3120 3121 dev_priv = dev->dev_private; 3122 if (dev_priv->mm.phys_objs[id - 1] == NULL) 3123 return; 3124 3125 phys_obj = dev_priv->mm.phys_objs[id - 1]; 3126 if (phys_obj->cur_obj != NULL) 3127 i915_gem_detach_phys_object(dev, phys_obj->cur_obj); 3128 3129 drm_pci_free(dev, phys_obj->handle); 3130 drm_free(phys_obj, DRM_I915_GEM); 3131 dev_priv->mm.phys_objs[id - 1] = NULL; 3132 } 3133 3134 void 3135 i915_gem_free_all_phys_object(struct drm_device *dev) 3136 { 3137 int i; 3138 3139 for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++) 3140 i915_gem_free_phys_object(dev, i); 3141 } 3142 3143 void 3144 i915_gem_detach_phys_object(struct drm_device *dev, 3145 struct drm_i915_gem_object *obj) 3146 { 3147 vm_page_t m; 3148 struct sf_buf *sf; 3149 char *vaddr, *dst; 3150 int i, page_count; 3151 3152 if (obj->phys_obj == NULL) 3153 return; 3154 vaddr = obj->phys_obj->handle->vaddr; 3155 3156 page_count = obj->base.size / PAGE_SIZE; 3157 VM_OBJECT_LOCK(obj->base.vm_obj); 3158 for (i = 0; i < page_count; i++) { 3159 m = i915_gem_wire_page(obj->base.vm_obj, i); 3160 if (m == NULL) 3161 continue; /* XXX */ 3162 3163 VM_OBJECT_UNLOCK(obj->base.vm_obj); 3164 sf = sf_buf_alloc(m); 3165 if (sf != NULL) { 3166 dst = (char *)sf_buf_kva(sf); 3167 memcpy(dst, vaddr + IDX_TO_OFF(i), PAGE_SIZE); 3168 sf_buf_free(sf); 3169 } 3170 drm_clflush_pages(&m, 1); 3171 3172 VM_OBJECT_LOCK(obj->base.vm_obj); 3173 vm_page_reference(m); 3174 vm_page_dirty(m); 3175 vm_page_busy_wait(m, FALSE, "i915gem"); 3176 vm_page_unwire(m, 0); 3177 vm_page_wakeup(m); 3178 atomic_add_long(&i915_gem_wired_pages_cnt, -1); 3179 } 3180 VM_OBJECT_UNLOCK(obj->base.vm_obj); 3181 intel_gtt_chipset_flush(); 3182 3183 obj->phys_obj->cur_obj = NULL; 3184 obj->phys_obj = NULL; 3185 } 3186 3187 int 3188 i915_gem_attach_phys_object(struct drm_device *dev, 3189 struct drm_i915_gem_object *obj, 3190 int id, 3191 int align) 3192 { 3193 drm_i915_private_t *dev_priv; 3194 vm_page_t m; 3195 struct sf_buf *sf; 3196 char *dst, *src; 3197 int i, page_count, ret; 3198 3199 if (id > I915_MAX_PHYS_OBJECT) 3200 return (-EINVAL); 3201 3202 if (obj->phys_obj != NULL) { 3203 if (obj->phys_obj->id == id) 3204 return (0); 3205 i915_gem_detach_phys_object(dev, obj); 3206 } 3207 3208 dev_priv = dev->dev_private; 3209 if (dev_priv->mm.phys_objs[id - 1] == NULL) { 3210 ret = i915_gem_init_phys_object(dev, id, obj->base.size, align); 3211 if (ret != 0) { 3212 DRM_ERROR("failed to init phys object %d size: %zu\n", 3213 id, obj->base.size); 3214 return (ret); 3215 } 3216 } 3217 3218 /* bind to the object */ 3219 obj->phys_obj = dev_priv->mm.phys_objs[id - 1]; 3220 obj->phys_obj->cur_obj = obj; 3221 3222 page_count = obj->base.size / PAGE_SIZE; 3223 3224 VM_OBJECT_LOCK(obj->base.vm_obj); 3225 ret = 0; 3226 for (i = 0; i < page_count; i++) { 3227 m = i915_gem_wire_page(obj->base.vm_obj, i); 3228 if (m == NULL) { 3229 ret = -EIO; 3230 break; 3231 } 3232 VM_OBJECT_UNLOCK(obj->base.vm_obj); 3233 sf = sf_buf_alloc(m); 3234 src = (char *)sf_buf_kva(sf); 3235 dst = (char *)obj->phys_obj->handle->vaddr + IDX_TO_OFF(i); 3236 memcpy(dst, src, PAGE_SIZE); 3237 sf_buf_free(sf); 3238 3239 VM_OBJECT_LOCK(obj->base.vm_obj); 3240 3241 vm_page_reference(m); 3242 vm_page_busy_wait(m, FALSE, "i915gem"); 3243 vm_page_unwire(m, 0); 3244 vm_page_wakeup(m); 3245 atomic_add_long(&i915_gem_wired_pages_cnt, -1); 3246 } 3247 VM_OBJECT_UNLOCK(obj->base.vm_obj); 3248 3249 return (0); 3250 } 3251 3252 static int 3253 i915_gem_phys_pwrite(struct drm_device *dev, struct drm_i915_gem_object *obj, 3254 uint64_t data_ptr, uint64_t offset, uint64_t size, 3255 struct drm_file *file_priv) 3256 { 3257 char *user_data, *vaddr; 3258 int ret; 3259 3260 vaddr = (char *)obj->phys_obj->handle->vaddr + offset; 3261 user_data = (char *)(uintptr_t)data_ptr; 3262 3263 if (copyin_nofault(user_data, vaddr, size) != 0) { 3264 /* The physical object once assigned is fixed for the lifetime 3265 * of the obj, so we can safely drop the lock and continue 3266 * to access vaddr. 3267 */ 3268 DRM_UNLOCK(dev); 3269 ret = -copyin(user_data, vaddr, size); 3270 DRM_LOCK(dev); 3271 if (ret != 0) 3272 return (ret); 3273 } 3274 3275 intel_gtt_chipset_flush(); 3276 return (0); 3277 } 3278 3279 void 3280 i915_gem_release(struct drm_device *dev, struct drm_file *file) 3281 { 3282 struct drm_i915_file_private *file_priv; 3283 struct drm_i915_gem_request *request; 3284 3285 file_priv = file->driver_priv; 3286 3287 /* Clean up our request list when the client is going away, so that 3288 * later retire_requests won't dereference our soon-to-be-gone 3289 * file_priv. 3290 */ 3291 spin_lock(&file_priv->mm.lock); 3292 while (!list_empty(&file_priv->mm.request_list)) { 3293 request = list_first_entry(&file_priv->mm.request_list, 3294 struct drm_i915_gem_request, 3295 client_list); 3296 list_del(&request->client_list); 3297 request->file_priv = NULL; 3298 } 3299 spin_unlock(&file_priv->mm.lock); 3300 } 3301 3302 static int 3303 i915_gem_swap_io(struct drm_device *dev, struct drm_i915_gem_object *obj, 3304 uint64_t data_ptr, uint64_t size, uint64_t offset, enum uio_rw rw, 3305 struct drm_file *file) 3306 { 3307 vm_object_t vm_obj; 3308 vm_page_t m; 3309 struct sf_buf *sf; 3310 vm_offset_t mkva; 3311 vm_pindex_t obj_pi; 3312 int cnt, do_bit17_swizzling, length, obj_po, ret, swizzled_po; 3313 3314 if (obj->gtt_offset != 0 && rw == UIO_READ) 3315 do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 3316 else 3317 do_bit17_swizzling = 0; 3318 3319 obj->dirty = 1; 3320 vm_obj = obj->base.vm_obj; 3321 ret = 0; 3322 3323 VM_OBJECT_LOCK(vm_obj); 3324 vm_object_pip_add(vm_obj, 1); 3325 while (size > 0) { 3326 obj_pi = OFF_TO_IDX(offset); 3327 obj_po = offset & PAGE_MASK; 3328 3329 m = i915_gem_wire_page(vm_obj, obj_pi); 3330 VM_OBJECT_UNLOCK(vm_obj); 3331 3332 sf = sf_buf_alloc(m); 3333 mkva = sf_buf_kva(sf); 3334 length = min(size, PAGE_SIZE - obj_po); 3335 while (length > 0) { 3336 if (do_bit17_swizzling && 3337 (VM_PAGE_TO_PHYS(m) & (1 << 17)) != 0) { 3338 cnt = roundup2(obj_po + 1, 64); 3339 cnt = min(cnt - obj_po, length); 3340 swizzled_po = obj_po ^ 64; 3341 } else { 3342 cnt = length; 3343 swizzled_po = obj_po; 3344 } 3345 if (rw == UIO_READ) 3346 ret = -copyout_nofault( 3347 (char *)mkva + swizzled_po, 3348 (void *)(uintptr_t)data_ptr, cnt); 3349 else 3350 ret = -copyin_nofault( 3351 (void *)(uintptr_t)data_ptr, 3352 (char *)mkva + swizzled_po, cnt); 3353 if (ret != 0) 3354 break; 3355 data_ptr += cnt; 3356 size -= cnt; 3357 length -= cnt; 3358 offset += cnt; 3359 obj_po += cnt; 3360 } 3361 sf_buf_free(sf); 3362 VM_OBJECT_LOCK(vm_obj); 3363 if (rw == UIO_WRITE) 3364 vm_page_dirty(m); 3365 vm_page_reference(m); 3366 vm_page_busy_wait(m, FALSE, "i915gem"); 3367 vm_page_unwire(m, 1); 3368 vm_page_wakeup(m); 3369 atomic_add_long(&i915_gem_wired_pages_cnt, -1); 3370 3371 if (ret != 0) 3372 break; 3373 } 3374 vm_object_pip_wakeup(vm_obj); 3375 VM_OBJECT_UNLOCK(vm_obj); 3376 3377 return (ret); 3378 } 3379 3380 static int 3381 i915_gem_gtt_write(struct drm_device *dev, struct drm_i915_gem_object *obj, 3382 uint64_t data_ptr, uint64_t size, uint64_t offset, struct drm_file *file) 3383 { 3384 vm_offset_t mkva; 3385 int ret; 3386 3387 /* 3388 * Pass the unaligned physical address and size to pmap_mapdev_attr() 3389 * so it can properly calculate whether an extra page needs to be 3390 * mapped or not to cover the requested range. The function will 3391 * add the page offset into the returned mkva for us. 3392 */ 3393 mkva = (vm_offset_t)pmap_mapdev_attr(dev->agp->base + obj->gtt_offset + 3394 offset, size, PAT_WRITE_COMBINING); 3395 ret = -copyin_nofault((void *)(uintptr_t)data_ptr, (char *)mkva, size); 3396 pmap_unmapdev(mkva, size); 3397 return (ret); 3398 } 3399 3400 static int 3401 i915_gem_obj_io(struct drm_device *dev, uint32_t handle, uint64_t data_ptr, 3402 uint64_t size, uint64_t offset, enum uio_rw rw, struct drm_file *file) 3403 { 3404 struct drm_i915_gem_object *obj; 3405 vm_page_t *ma; 3406 vm_offset_t start, end; 3407 int npages, ret; 3408 3409 if (size == 0) 3410 return (0); 3411 start = trunc_page(data_ptr); 3412 end = round_page(data_ptr + size); 3413 npages = howmany(end - start, PAGE_SIZE); 3414 ma = kmalloc(npages * sizeof(vm_page_t), DRM_I915_GEM, M_WAITOK | 3415 M_ZERO); 3416 npages = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map, 3417 (vm_offset_t)data_ptr, size, 3418 (rw == UIO_READ ? VM_PROT_WRITE : 0 ) | VM_PROT_READ, ma, npages); 3419 if (npages == -1) { 3420 ret = -EFAULT; 3421 goto free_ma; 3422 } 3423 3424 ret = i915_mutex_lock_interruptible(dev); 3425 if (ret != 0) 3426 goto unlocked; 3427 3428 obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle)); 3429 if (&obj->base == NULL) { 3430 ret = -ENOENT; 3431 goto unlock; 3432 } 3433 if (offset > obj->base.size || size > obj->base.size - offset) { 3434 ret = -EINVAL; 3435 goto out; 3436 } 3437 3438 if (rw == UIO_READ) { 3439 ret = i915_gem_swap_io(dev, obj, data_ptr, size, offset, 3440 UIO_READ, file); 3441 } else { 3442 if (obj->phys_obj) { 3443 ret = i915_gem_phys_pwrite(dev, obj, data_ptr, offset, 3444 size, file); 3445 } else if (obj->gtt_space && 3446 obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 3447 ret = i915_gem_object_pin(obj, 0, true); 3448 if (ret != 0) 3449 goto out; 3450 ret = i915_gem_object_set_to_gtt_domain(obj, true); 3451 if (ret != 0) 3452 goto out_unpin; 3453 ret = i915_gem_object_put_fence(obj); 3454 if (ret != 0) 3455 goto out_unpin; 3456 ret = i915_gem_gtt_write(dev, obj, data_ptr, size, 3457 offset, file); 3458 out_unpin: 3459 i915_gem_object_unpin(obj); 3460 } else { 3461 ret = i915_gem_object_set_to_cpu_domain(obj, true); 3462 if (ret != 0) 3463 goto out; 3464 ret = i915_gem_swap_io(dev, obj, data_ptr, size, offset, 3465 UIO_WRITE, file); 3466 } 3467 } 3468 out: 3469 drm_gem_object_unreference(&obj->base); 3470 unlock: 3471 DRM_UNLOCK(dev); 3472 unlocked: 3473 vm_page_unhold_pages(ma, npages); 3474 free_ma: 3475 drm_free(ma, DRM_I915_GEM); 3476 return (ret); 3477 } 3478 3479 static int 3480 i915_gem_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, 3481 vm_ooffset_t foff, struct ucred *cred, u_short *color) 3482 { 3483 3484 *color = 0; /* XXXKIB */ 3485 return (0); 3486 } 3487 3488 int i915_intr_pf; 3489 3490 static int 3491 i915_gem_pager_fault(vm_object_t vm_obj, vm_ooffset_t offset, int prot, 3492 vm_page_t *mres) 3493 { 3494 struct drm_gem_object *gem_obj; 3495 struct drm_i915_gem_object *obj; 3496 struct drm_device *dev; 3497 drm_i915_private_t *dev_priv; 3498 vm_page_t m, oldm; 3499 int cause, ret; 3500 bool write; 3501 3502 gem_obj = vm_obj->handle; 3503 obj = to_intel_bo(gem_obj); 3504 dev = obj->base.dev; 3505 dev_priv = dev->dev_private; 3506 #if 0 3507 write = (prot & VM_PROT_WRITE) != 0; 3508 #else 3509 write = true; 3510 #endif 3511 vm_object_pip_add(vm_obj, 1); 3512 3513 /* 3514 * Remove the placeholder page inserted by vm_fault() from the 3515 * object before dropping the object lock. If 3516 * i915_gem_release_mmap() is active in parallel on this gem 3517 * object, then it owns the drm device sx and might find the 3518 * placeholder already. Then, since the page is busy, 3519 * i915_gem_release_mmap() sleeps waiting for the busy state 3520 * of the page cleared. We will be not able to acquire drm 3521 * device lock until i915_gem_release_mmap() is able to make a 3522 * progress. 3523 */ 3524 if (*mres != NULL) { 3525 oldm = *mres; 3526 vm_page_remove(oldm); 3527 *mres = NULL; 3528 } else 3529 oldm = NULL; 3530 retry: 3531 VM_OBJECT_UNLOCK(vm_obj); 3532 unlocked_vmobj: 3533 cause = ret = 0; 3534 m = NULL; 3535 3536 if (i915_intr_pf) { 3537 ret = i915_mutex_lock_interruptible(dev); 3538 if (ret != 0) { 3539 cause = 10; 3540 goto out; 3541 } 3542 } else 3543 DRM_LOCK(dev); 3544 3545 /* 3546 * Since the object lock was dropped, other thread might have 3547 * faulted on the same GTT address and instantiated the 3548 * mapping for the page. Recheck. 3549 */ 3550 VM_OBJECT_LOCK(vm_obj); 3551 m = vm_page_lookup(vm_obj, OFF_TO_IDX(offset)); 3552 if (m != NULL) { 3553 if ((m->flags & PG_BUSY) != 0) { 3554 DRM_UNLOCK(dev); 3555 #if 0 /* XXX */ 3556 vm_page_sleep(m, "915pee"); 3557 #endif 3558 goto retry; 3559 } 3560 goto have_page; 3561 } else 3562 VM_OBJECT_UNLOCK(vm_obj); 3563 3564 /* Now bind it into the GTT if needed */ 3565 if (!obj->map_and_fenceable) { 3566 ret = i915_gem_object_unbind(obj); 3567 if (ret != 0) { 3568 cause = 20; 3569 goto unlock; 3570 } 3571 } 3572 if (!obj->gtt_space) { 3573 ret = i915_gem_object_bind_to_gtt(obj, 0, true); 3574 if (ret != 0) { 3575 cause = 30; 3576 goto unlock; 3577 } 3578 3579 ret = i915_gem_object_set_to_gtt_domain(obj, write); 3580 if (ret != 0) { 3581 cause = 40; 3582 goto unlock; 3583 } 3584 } 3585 3586 if (obj->tiling_mode == I915_TILING_NONE) 3587 ret = i915_gem_object_put_fence(obj); 3588 else 3589 ret = i915_gem_object_get_fence(obj); 3590 if (ret != 0) { 3591 cause = 50; 3592 goto unlock; 3593 } 3594 3595 if (i915_gem_object_is_inactive(obj)) 3596 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list); 3597 3598 obj->fault_mappable = true; 3599 VM_OBJECT_LOCK(vm_obj); 3600 m = vm_phys_fictitious_to_vm_page(dev->agp->base + obj->gtt_offset + 3601 offset); 3602 if (m == NULL) { 3603 cause = 60; 3604 ret = -EFAULT; 3605 goto unlock; 3606 } 3607 KASSERT((m->flags & PG_FICTITIOUS) != 0, 3608 ("not fictitious %p", m)); 3609 KASSERT(m->wire_count == 1, ("wire_count not 1 %p", m)); 3610 3611 if ((m->flags & PG_BUSY) != 0) { 3612 DRM_UNLOCK(dev); 3613 #if 0 /* XXX */ 3614 vm_page_sleep(m, "915pbs"); 3615 #endif 3616 goto retry; 3617 } 3618 m->valid = VM_PAGE_BITS_ALL; 3619 vm_page_insert(m, vm_obj, OFF_TO_IDX(offset)); 3620 have_page: 3621 *mres = m; 3622 vm_page_busy_try(m, false); 3623 3624 DRM_UNLOCK(dev); 3625 if (oldm != NULL) { 3626 vm_page_free(oldm); 3627 } 3628 vm_object_pip_wakeup(vm_obj); 3629 return (VM_PAGER_OK); 3630 3631 unlock: 3632 DRM_UNLOCK(dev); 3633 out: 3634 KASSERT(ret != 0, ("i915_gem_pager_fault: wrong return")); 3635 if (ret == -EAGAIN || ret == -EIO || ret == -EINTR) { 3636 goto unlocked_vmobj; 3637 } 3638 VM_OBJECT_LOCK(vm_obj); 3639 vm_object_pip_wakeup(vm_obj); 3640 return (VM_PAGER_ERROR); 3641 } 3642 3643 static void 3644 i915_gem_pager_dtor(void *handle) 3645 { 3646 struct drm_gem_object *obj; 3647 struct drm_device *dev; 3648 3649 obj = handle; 3650 dev = obj->dev; 3651 3652 DRM_LOCK(dev); 3653 drm_gem_free_mmap_offset(obj); 3654 i915_gem_release_mmap(to_intel_bo(obj)); 3655 drm_gem_object_unreference(obj); 3656 DRM_UNLOCK(dev); 3657 } 3658 3659 struct cdev_pager_ops i915_gem_pager_ops = { 3660 .cdev_pg_fault = i915_gem_pager_fault, 3661 .cdev_pg_ctor = i915_gem_pager_ctor, 3662 .cdev_pg_dtor = i915_gem_pager_dtor 3663 }; 3664 3665 #define GEM_PARANOID_CHECK_GTT 0 3666 #if GEM_PARANOID_CHECK_GTT 3667 static void 3668 i915_gem_assert_pages_not_mapped(struct drm_device *dev, vm_page_t *ma, 3669 int page_count) 3670 { 3671 struct drm_i915_private *dev_priv; 3672 vm_paddr_t pa; 3673 unsigned long start, end; 3674 u_int i; 3675 int j; 3676 3677 dev_priv = dev->dev_private; 3678 start = OFF_TO_IDX(dev_priv->mm.gtt_start); 3679 end = OFF_TO_IDX(dev_priv->mm.gtt_end); 3680 for (i = start; i < end; i++) { 3681 pa = intel_gtt_read_pte_paddr(i); 3682 for (j = 0; j < page_count; j++) { 3683 if (pa == VM_PAGE_TO_PHYS(ma[j])) { 3684 panic("Page %p in GTT pte index %d pte %x", 3685 ma[i], i, intel_gtt_read_pte(i)); 3686 } 3687 } 3688 } 3689 } 3690 #endif 3691 3692 static void 3693 i915_gem_process_flushing_list(struct intel_ring_buffer *ring, 3694 uint32_t flush_domains) 3695 { 3696 struct drm_i915_gem_object *obj, *next; 3697 uint32_t old_write_domain; 3698 3699 list_for_each_entry_safe(obj, next, &ring->gpu_write_list, 3700 gpu_write_list) { 3701 if (obj->base.write_domain & flush_domains) { 3702 old_write_domain = obj->base.write_domain; 3703 obj->base.write_domain = 0; 3704 list_del_init(&obj->gpu_write_list); 3705 i915_gem_object_move_to_active(obj, ring); 3706 } 3707 } 3708 } 3709 3710 #define VM_OBJECT_LOCK_ASSERT_OWNED(object) 3711 3712 static vm_page_t 3713 i915_gem_wire_page(vm_object_t object, vm_pindex_t pindex) 3714 { 3715 vm_page_t m; 3716 int rv; 3717 3718 VM_OBJECT_LOCK_ASSERT_OWNED(object); 3719 m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); 3720 if (m->valid != VM_PAGE_BITS_ALL) { 3721 if (vm_pager_has_page(object, pindex)) { 3722 rv = vm_pager_get_page(object, &m, 1); 3723 m = vm_page_lookup(object, pindex); 3724 if (m == NULL) 3725 return (NULL); 3726 if (rv != VM_PAGER_OK) { 3727 vm_page_free(m); 3728 return (NULL); 3729 } 3730 } else { 3731 pmap_zero_page(VM_PAGE_TO_PHYS(m)); 3732 m->valid = VM_PAGE_BITS_ALL; 3733 m->dirty = 0; 3734 } 3735 } 3736 vm_page_wire(m); 3737 vm_page_wakeup(m); 3738 atomic_add_long(&i915_gem_wired_pages_cnt, 1); 3739 return (m); 3740 } 3741 3742 int 3743 i915_gem_flush_ring(struct intel_ring_buffer *ring, uint32_t invalidate_domains, 3744 uint32_t flush_domains) 3745 { 3746 int ret; 3747 3748 if (((invalidate_domains | flush_domains) & I915_GEM_GPU_DOMAINS) == 0) 3749 return 0; 3750 3751 ret = ring->flush(ring, invalidate_domains, flush_domains); 3752 if (ret) 3753 return ret; 3754 3755 if (flush_domains & I915_GEM_GPU_DOMAINS) 3756 i915_gem_process_flushing_list(ring, flush_domains); 3757 return 0; 3758 } 3759 3760 static int 3761 i915_gpu_is_active(struct drm_device *dev) 3762 { 3763 drm_i915_private_t *dev_priv = dev->dev_private; 3764 3765 return !list_empty(&dev_priv->mm.active_list); 3766 } 3767 3768 static void 3769 i915_gem_lowmem(void *arg) 3770 { 3771 struct drm_device *dev; 3772 struct drm_i915_private *dev_priv; 3773 struct drm_i915_gem_object *obj, *next; 3774 int cnt, cnt_fail, cnt_total; 3775 3776 dev = arg; 3777 dev_priv = dev->dev_private; 3778 3779 if (lockmgr(&dev->dev_struct_lock, LK_EXCLUSIVE|LK_NOWAIT)) 3780 return; 3781 3782 rescan: 3783 /* first scan for clean buffers */ 3784 i915_gem_retire_requests(dev); 3785 3786 cnt_total = cnt_fail = cnt = 0; 3787 3788 list_for_each_entry_safe(obj, next, &dev_priv->mm.inactive_list, 3789 mm_list) { 3790 if (i915_gem_object_is_purgeable(obj)) { 3791 if (i915_gem_object_unbind(obj) != 0) 3792 cnt_total++; 3793 } else 3794 cnt_total++; 3795 } 3796 3797 /* second pass, evict/count anything still on the inactive list */ 3798 list_for_each_entry_safe(obj, next, &dev_priv->mm.inactive_list, 3799 mm_list) { 3800 if (i915_gem_object_unbind(obj) == 0) 3801 cnt++; 3802 else 3803 cnt_fail++; 3804 } 3805 3806 if (cnt_fail > cnt_total / 100 && i915_gpu_is_active(dev)) { 3807 /* 3808 * We are desperate for pages, so as a last resort, wait 3809 * for the GPU to finish and discard whatever we can. 3810 * This has a dramatic impact to reduce the number of 3811 * OOM-killer events whilst running the GPU aggressively. 3812 */ 3813 if (i915_gpu_idle(dev) == 0) 3814 goto rescan; 3815 } 3816 DRM_UNLOCK(dev); 3817 } 3818 3819 void 3820 i915_gem_unload(struct drm_device *dev) 3821 { 3822 struct drm_i915_private *dev_priv; 3823 3824 dev_priv = dev->dev_private; 3825 EVENTHANDLER_DEREGISTER(vm_lowmem, dev_priv->mm.i915_lowmem); 3826 } 3827