1 /* $OpenBSD: i915_gem.c,v 1.70 2014/02/13 23:11:05 kettenis Exp $ */ 2 /* 3 * Copyright (c) 2008-2009 Owain G. Ainsworth <oga@openbsd.org> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 /* 18 * Copyright © 2008 Intel Corporation 19 * 20 * Permission is hereby granted, free of charge, to any person obtaining a 21 * copy of this software and associated documentation files (the "Software"), 22 * to deal in the Software without restriction, including without limitation 23 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 24 * and/or sell copies of the Software, and to permit persons to whom the 25 * Software is furnished to do so, subject to the following conditions: 26 * 27 * The above copyright notice and this permission notice (including the next 28 * paragraph) shall be included in all copies or substantial portions of the 29 * Software. 30 * 31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 32 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 33 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 34 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 35 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 36 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 37 * IN THE SOFTWARE. 38 * 39 * Authors: 40 * Eric Anholt <eric@anholt.net> 41 * 42 */ 43 44 #include <dev/pci/drm/drmP.h> 45 #include <dev/pci/drm/drm.h> 46 #include <dev/pci/drm/i915_drm.h> 47 #include "i915_drv.h" 48 #include "i915_trace.h" 49 #include "intel_drv.h" 50 51 #include <machine/pmap.h> 52 53 #include <sys/queue.h> 54 #include <sys/task.h> 55 #include <sys/time.h> 56 57 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); 58 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); 59 static __must_check int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, 60 unsigned alignment, 61 bool map_and_fenceable, 62 bool nonblocking); 63 static int i915_gem_phys_pwrite(struct drm_device *dev, 64 struct drm_i915_gem_object *obj, 65 struct drm_i915_gem_pwrite *args, 66 struct drm_file *file); 67 68 static void i915_gem_write_fence(struct drm_device *dev, int reg, 69 struct drm_i915_gem_object *obj); 70 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, 71 struct drm_i915_fence_reg *fence, 72 bool enable); 73 74 #ifdef notyet 75 static int i915_gem_inactive_shrink(struct shrinker *shrinker, 76 struct shrink_control *sc); 77 static long i915_gem_purge(struct drm_i915_private *dev_priv, long target); 78 static void i915_gem_shrink_all(struct drm_i915_private *dev_priv); 79 #endif 80 static void i915_gem_object_truncate(struct drm_i915_gem_object *obj); 81 82 static inline int timespec_to_jiffies(const struct timespec *); 83 static inline int timespec_valid(const struct timespec *); 84 static struct timespec ns_to_timespec(const int64_t); 85 static inline int64_t timespec_to_ns(const struct timespec *); 86 87 extern int ticks; 88 89 static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) 90 { 91 if (obj->tiling_mode) 92 i915_gem_release_mmap(obj); 93 94 /* As we do not have an associated fence register, we will force 95 * a tiling change if we ever need to acquire one. 96 */ 97 obj->fence_dirty = false; 98 obj->fence_reg = I915_FENCE_REG_NONE; 99 } 100 101 /* some bookkeeping */ 102 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 103 size_t size) 104 { 105 dev_priv->mm.object_count++; 106 dev_priv->mm.object_memory += size; 107 } 108 109 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 110 size_t size) 111 { 112 dev_priv->mm.object_count--; 113 dev_priv->mm.object_memory -= size; 114 } 115 116 static int 117 i915_gem_wait_for_error(struct drm_device *dev) 118 { 119 struct drm_i915_private *dev_priv = dev->dev_private; 120 int ret; 121 122 if (!atomic_read(&dev_priv->mm.wedged)) 123 return 0; 124 125 /* 126 * Only wait 10 seconds for the gpu reset to complete to avoid hanging 127 * userspace. If it takes that long something really bad is going on and 128 * we should simply try to bail out and fail as gracefully as possible. 129 */ 130 mtx_enter(&dev_priv->error_completion_lock); 131 while (dev_priv->error_completion == 0) { 132 ret = -msleep(&dev_priv->error_completion, 133 &dev_priv->error_completion_lock, PCATCH, "915wco", 10*hz); 134 if (ret) { 135 mtx_leave(&dev_priv->error_completion_lock); 136 return ret; 137 } 138 } 139 mtx_leave(&dev_priv->error_completion_lock); 140 141 if (atomic_read(&dev_priv->mm.wedged)) { 142 /* GPU is hung, bump the completion count to account for 143 * the token we just consumed so that we never hit zero and 144 * end up waiting upon a subsequent completion event that 145 * will never happen. 146 */ 147 mtx_enter(&dev_priv->error_completion_lock); 148 dev_priv->error_completion++; 149 mtx_leave(&dev_priv->error_completion_lock); 150 } 151 return 0; 152 } 153 154 int i915_mutex_lock_interruptible(struct drm_device *dev) 155 { 156 int ret; 157 158 ret = i915_gem_wait_for_error(dev); 159 if (ret) 160 return ret; 161 162 ret = -rw_enter(&dev->dev_lock, RW_WRITE | RW_INTR); 163 if (ret) 164 return ret; 165 166 WARN_ON(i915_verify_lists(dev)); 167 return 0; 168 } 169 170 static inline bool 171 i915_gem_object_is_inactive(struct drm_i915_gem_object *obj) 172 { 173 return obj->gtt_space && !obj->active; 174 } 175 176 int 177 i915_gem_init_ioctl(struct drm_device *dev, void *data, 178 struct drm_file *file) 179 { 180 struct drm_i915_gem_init *args = data; 181 182 if (drm_core_check_feature(dev, DRIVER_MODESET)) 183 return -ENODEV; 184 185 if (args->gtt_start >= args->gtt_end || 186 (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1)) 187 return -EINVAL; 188 189 /* GEM with user mode setting was never supported on ilk and later. */ 190 if (INTEL_INFO(dev)->gen >= 5) 191 return -ENODEV; 192 193 DRM_LOCK(); 194 i915_gem_init_global_gtt(dev, args->gtt_start, 195 args->gtt_end, args->gtt_end); 196 DRM_UNLOCK(); 197 198 return 0; 199 } 200 201 int 202 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 203 struct drm_file *file) 204 { 205 struct drm_i915_private *dev_priv = dev->dev_private; 206 struct drm_i915_gem_get_aperture *args = data; 207 struct drm_i915_gem_object *obj; 208 size_t pinned; 209 210 pinned = 0; 211 DRM_LOCK(); 212 list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list) 213 if (obj->pin_count) 214 pinned += obj->gtt_space->size; 215 DRM_UNLOCK(); 216 217 args->aper_size = dev_priv->mm.gtt_total; 218 args->aper_available_size = args->aper_size - pinned; 219 220 return 0; 221 } 222 223 static int 224 i915_gem_create(struct drm_file *file, 225 struct drm_device *dev, 226 uint64_t size, 227 uint32_t *handle_p) 228 { 229 struct drm_i915_gem_object *obj; 230 int ret; 231 u32 handle; 232 233 size = round_page(size); 234 if (size == 0) 235 return -EINVAL; 236 237 /* Allocate the new object */ 238 obj = i915_gem_alloc_object(dev, size); 239 if (obj == NULL) 240 return -ENOMEM; 241 242 ret = drm_gem_handle_create(file, &obj->base, &handle); 243 if (ret) { 244 drm_gem_object_release(&obj->base); 245 i915_gem_info_remove_obj(dev->dev_private, obj->base.size); 246 pool_put(&dev->objpl, obj); 247 return ret; 248 } 249 250 /* drop reference from allocate - handle holds it now */ 251 drm_gem_object_unreference(&obj->base); 252 trace_i915_gem_object_create(obj); 253 254 *handle_p = handle; 255 return 0; 256 } 257 258 int 259 i915_gem_dumb_create(struct drm_file *file, 260 struct drm_device *dev, 261 struct drm_mode_create_dumb *args) 262 { 263 /* have to work out size/pitch and return them */ 264 args->pitch = roundup2(args->width * ((args->bpp + 7) / 8), 64); 265 args->size = args->pitch * args->height; 266 return i915_gem_create(file, dev, 267 args->size, &args->handle); 268 } 269 270 int i915_gem_dumb_destroy(struct drm_file *file, 271 struct drm_device *dev, 272 uint32_t handle) 273 { 274 return drm_gem_handle_delete(file, handle); 275 } 276 277 /** 278 * Creates a new mm object and returns a handle to it. 279 */ 280 int 281 i915_gem_create_ioctl(struct drm_device *dev, void *data, 282 struct drm_file *file) 283 { 284 struct drm_i915_gem_create *args = data; 285 286 return i915_gem_create(file, dev, 287 args->size, &args->handle); 288 } 289 290 static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj) 291 { 292 drm_i915_private_t *dev_priv = obj->base.dev->dev_private; 293 294 return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 && 295 obj->tiling_mode != I915_TILING_NONE; 296 } 297 298 #define offset_in_page(off) ((off) & PAGE_MASK) 299 300 static void * 301 kmap(struct vm_page *pg) 302 { 303 vaddr_t va; 304 305 #if defined (__HAVE_PMAP_DIRECT) 306 va = pmap_map_direct(pg); 307 #else 308 va = uvm_km_valloc_wait(phys_map, PAGE_SIZE); 309 pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg), VM_PROT_READ|VM_PROT_WRITE); 310 pmap_update(pmap_kernel()); 311 #endif 312 return (void *)va; 313 } 314 315 static void 316 kunmap(void *addr) 317 { 318 vaddr_t va = (vaddr_t)addr; 319 320 #if defined (__HAVE_PMAP_DIRECT) 321 pmap_unmap_direct(va); 322 #else 323 pmap_kremove(va, PAGE_SIZE); 324 pmap_update(pmap_kernel()); 325 uvm_km_free_wakeup(phys_map, va, PAGE_SIZE); 326 #endif 327 } 328 329 static inline void * 330 kmap_atomic(struct vm_page *pg) 331 { 332 vaddr_t va; 333 334 #if defined (__HAVE_PMAP_DIRECT) 335 va = pmap_map_direct(pg); 336 #else 337 extern vaddr_t pmap_tmpmap_pa(paddr_t); 338 va = pmap_tmpmap_pa(VM_PAGE_TO_PHYS(pg)); 339 #endif 340 return (void *)va; 341 } 342 343 static inline void 344 kunmap_atomic(void *addr) 345 { 346 #if defined (__HAVE_PMAP_DIRECT) 347 pmap_unmap_direct((vaddr_t)addr); 348 #else 349 extern void pmap_tmpunmap_pa(void); 350 pmap_tmpunmap_pa(); 351 #endif 352 } 353 354 static inline void 355 drm_clflush_virt_range(void *addr, size_t len) 356 { 357 pmap_flush_cache((vaddr_t)addr, len); 358 } 359 360 static inline unsigned long 361 __copy_to_user(void *to, const void *from, unsigned len) 362 { 363 if (copyout(from, to, len)) 364 return len; 365 return 0; 366 } 367 368 static inline unsigned long 369 __copy_to_user_inatomic(void *to, const void *from, unsigned len) 370 { 371 struct cpu_info *ci = curcpu(); 372 int error; 373 374 ci->ci_inatomic = 1; 375 error = copyout(from, to, len); 376 ci->ci_inatomic = 0; 377 378 return (error ? len : 0); 379 } 380 381 static inline int 382 __copy_to_user_swizzled(char __user *cpu_vaddr, 383 const char *gpu_vaddr, int gpu_offset, 384 int length) 385 { 386 int ret, cpu_offset = 0; 387 388 while (length > 0) { 389 int cacheline_end = roundup2(gpu_offset + 1, 64); 390 int this_length = min(cacheline_end - gpu_offset, length); 391 int swizzled_gpu_offset = gpu_offset ^ 64; 392 393 ret = __copy_to_user(cpu_vaddr + cpu_offset, 394 gpu_vaddr + swizzled_gpu_offset, 395 this_length); 396 if (ret) 397 return ret + length; 398 399 cpu_offset += this_length; 400 gpu_offset += this_length; 401 length -= this_length; 402 } 403 404 return 0; 405 } 406 407 static inline unsigned long 408 __copy_from_user(void *to, const void *from, unsigned len) 409 { 410 if (copyin(from, to, len)) 411 return len; 412 return 0; 413 } 414 415 static inline unsigned long 416 __copy_from_user_inatomic_nocache(void *to, const void *from, unsigned len) 417 { 418 struct cpu_info *ci = curcpu(); 419 int error; 420 421 ci->ci_inatomic = 1; 422 error = copyin(from, to, len); 423 ci->ci_inatomic = 0; 424 425 return (error ? len : 0); 426 } 427 428 static inline int 429 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, 430 const char __user *cpu_vaddr, 431 int length) 432 { 433 int ret, cpu_offset = 0; 434 435 while (length > 0) { 436 int cacheline_end = roundup2(gpu_offset + 1, 64); 437 int this_length = min(cacheline_end - gpu_offset, length); 438 int swizzled_gpu_offset = gpu_offset ^ 64; 439 440 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, 441 cpu_vaddr + cpu_offset, 442 this_length); 443 if (ret) 444 return ret + length; 445 446 cpu_offset += this_length; 447 gpu_offset += this_length; 448 length -= this_length; 449 } 450 451 return 0; 452 } 453 454 /* Per-page copy function for the shmem pread fastpath. 455 * Flushes invalid cachelines before reading the target if 456 * needs_clflush is set. */ 457 static int 458 shmem_pread_fast(struct vm_page *page, int shmem_page_offset, int page_length, 459 char __user *user_data, 460 bool page_do_bit17_swizzling, bool needs_clflush) 461 { 462 char *vaddr; 463 int ret; 464 465 if (unlikely(page_do_bit17_swizzling)) 466 return -EINVAL; 467 468 vaddr = kmap_atomic(page); 469 if (needs_clflush) 470 drm_clflush_virt_range(vaddr + shmem_page_offset, 471 page_length); 472 ret = __copy_to_user_inatomic(user_data, 473 vaddr + shmem_page_offset, 474 page_length); 475 kunmap_atomic(vaddr); 476 477 return ret ? -EFAULT : 0; 478 } 479 480 #define round_up(x, y) ((((x) + ((y) - 1)) / (y)) * (y)) 481 #define round_down(x, y) (((x) / (y)) * (y)) 482 483 static void 484 shmem_clflush_swizzled_range(char *addr, unsigned long length, 485 bool swizzled) 486 { 487 if (unlikely(swizzled)) { 488 unsigned long start = (unsigned long) addr; 489 unsigned long end = (unsigned long) addr + length; 490 491 /* For swizzling simply ensure that we always flush both 492 * channels. Lame, but simple and it works. Swizzled 493 * pwrite/pread is far from a hotpath - current userspace 494 * doesn't use it at all. */ 495 start = round_down(start, 128); 496 end = round_up(end, 128); 497 498 drm_clflush_virt_range((void *)start, end - start); 499 } else { 500 drm_clflush_virt_range(addr, length); 501 } 502 503 } 504 505 /* Only difference to the fast-path function is that this can handle bit17 506 * and uses non-atomic copy and kmap functions. */ 507 static int 508 shmem_pread_slow(struct vm_page *page, int shmem_page_offset, int page_length, 509 char __user *user_data, 510 bool page_do_bit17_swizzling, bool needs_clflush) 511 { 512 char *vaddr; 513 int ret; 514 515 vaddr = kmap(page); 516 if (needs_clflush) 517 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 518 page_length, 519 page_do_bit17_swizzling); 520 521 if (page_do_bit17_swizzling) 522 ret = __copy_to_user_swizzled(user_data, 523 vaddr, shmem_page_offset, 524 page_length); 525 else 526 ret = __copy_to_user(user_data, 527 vaddr + shmem_page_offset, 528 page_length); 529 kunmap(vaddr); 530 531 return ret ? - EFAULT : 0; 532 } 533 534 static int 535 i915_gem_shmem_pread(struct drm_device *dev, 536 struct drm_i915_gem_object *obj, 537 struct drm_i915_gem_pread *args, 538 struct drm_file *file) 539 { 540 char __user *user_data; 541 ssize_t remain; 542 off_t offset; 543 int shmem_page_offset, page_length, ret = 0; 544 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 545 int hit_slowpath = 0; 546 int needs_clflush = 0; 547 int i; 548 549 user_data = (char __user *) (uintptr_t) args->data_ptr; 550 remain = args->size; 551 552 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 553 554 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) { 555 /* If we're not in the cpu read domain, set ourself into the gtt 556 * read domain and manually flush cachelines (if required). This 557 * optimizes for the case when the gpu will dirty the data 558 * anyway again before the next pread happens. */ 559 if (obj->cache_level == I915_CACHE_NONE) 560 needs_clflush = 1; 561 if (obj->gtt_space) { 562 ret = i915_gem_object_set_to_gtt_domain(obj, false); 563 if (ret) 564 return ret; 565 } 566 } 567 568 ret = i915_gem_object_get_pages(obj); 569 if (ret) 570 return ret; 571 572 i915_gem_object_pin_pages(obj); 573 574 offset = args->offset; 575 576 for (i = 0; i < (obj->base.size >> PAGE_SHIFT); i++) { 577 struct vm_page *page; 578 579 if (i < offset >> PAGE_SHIFT) 580 continue; 581 582 if (remain <= 0) 583 break; 584 585 /* Operation in this page 586 * 587 * shmem_page_offset = offset within page in shmem file 588 * page_length = bytes to copy for this page 589 */ 590 shmem_page_offset = offset_in_page(offset); 591 page_length = remain; 592 if ((shmem_page_offset + page_length) > PAGE_SIZE) 593 page_length = PAGE_SIZE - shmem_page_offset; 594 595 #ifdef __linux__ 596 page = sg_page(sg); 597 page_do_bit17_swizzling = obj_do_bit17_swizzling && 598 (page_to_phys(page) & (1 << 17)) != 0; 599 #else 600 page = obj->pages[i]; 601 page_do_bit17_swizzling = obj_do_bit17_swizzling && 602 (VM_PAGE_TO_PHYS(page) & (1 << 17)) != 0; 603 #endif 604 605 ret = shmem_pread_fast(page, shmem_page_offset, page_length, 606 user_data, page_do_bit17_swizzling, 607 needs_clflush); 608 if (ret == 0) 609 goto next_page; 610 611 hit_slowpath = 1; 612 DRM_UNLOCK(); 613 614 #ifdef __linux__ 615 if (!prefaulted) { 616 ret = fault_in_multipages_writeable(user_data, remain); 617 /* Userspace is tricking us, but we've already clobbered 618 * its pages with the prefault and promised to write the 619 * data up to the first fault. Hence ignore any errors 620 * and just continue. */ 621 (void)ret; 622 prefaulted = 1; 623 } 624 #endif 625 626 ret = shmem_pread_slow(page, shmem_page_offset, page_length, 627 user_data, page_do_bit17_swizzling, 628 needs_clflush); 629 630 DRM_LOCK(); 631 632 next_page: 633 #ifdef __linux__ 634 mark_page_accessed(page); 635 #endif 636 637 if (ret) 638 goto out; 639 640 remain -= page_length; 641 user_data += page_length; 642 offset += page_length; 643 } 644 645 out: 646 i915_gem_object_unpin_pages(obj); 647 648 if (hit_slowpath) { 649 /* Fixup: Kill any reinstated backing storage pages */ 650 if (obj->madv == __I915_MADV_PURGED) 651 i915_gem_object_truncate(obj); 652 } 653 654 return ret; 655 } 656 657 /** 658 * Reads data from the object referenced by handle. 659 * 660 * On error, the contents of *data are undefined. 661 */ 662 int 663 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 664 struct drm_file *file) 665 { 666 struct drm_i915_gem_pread *args = data; 667 struct drm_i915_gem_object *obj; 668 int ret = 0; 669 670 if (args->size == 0) 671 return 0; 672 673 ret = i915_mutex_lock_interruptible(dev); 674 if (ret) 675 return ret; 676 677 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 678 if (&obj->base == NULL) { 679 ret = -ENOENT; 680 goto unlock; 681 } 682 683 /* Bounds check source. */ 684 if (args->offset > obj->base.size || 685 args->size > obj->base.size - args->offset) { 686 ret = -EINVAL; 687 goto out; 688 } 689 690 trace_i915_gem_object_pread(obj, args->offset, args->size); 691 692 ret = i915_gem_shmem_pread(dev, obj, args, file); 693 694 out: 695 drm_gem_object_unreference(&obj->base); 696 unlock: 697 DRM_UNLOCK(); 698 return ret; 699 } 700 701 #ifdef __linux__ 702 /* This is the fast write path which cannot handle 703 * page faults in the source data 704 */ 705 706 static inline int 707 fast_user_write(struct io_mapping *mapping, 708 loff_t page_base, int page_offset, 709 char __user *user_data, 710 int length) 711 { 712 void __iomem *vaddr_atomic; 713 void *vaddr; 714 unsigned long unwritten; 715 716 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); 717 /* We can use the cpu mem copy function because this is X86. */ 718 vaddr = (void __force*)vaddr_atomic + page_offset; 719 unwritten = __copy_from_user_inatomic_nocache(vaddr, 720 user_data, length); 721 io_mapping_unmap_atomic(vaddr_atomic); 722 return unwritten; 723 } 724 #endif 725 726 /** 727 * This is the fast pwrite path, where we copy the data directly from the 728 * user into the GTT, uncached. 729 */ 730 static int 731 i915_gem_gtt_pwrite_fast(struct drm_device *dev, 732 struct drm_i915_gem_object *obj, 733 struct drm_i915_gem_pwrite *args, 734 struct drm_file *file) 735 { 736 drm_i915_private_t *dev_priv = dev->dev_private; 737 bus_space_handle_t bsh; 738 bus_addr_t offset; 739 bus_size_t size; 740 char *vaddr; 741 int ret; 742 743 ret = i915_gem_object_pin(obj, 0, true, true); 744 if (ret) 745 goto out; 746 747 ret = i915_gem_object_set_to_gtt_domain(obj, true); 748 if (ret) 749 goto out_unpin; 750 751 ret = i915_gem_object_put_fence(obj); 752 if (ret) 753 goto out_unpin; 754 755 offset = obj->gtt_offset + args->offset; 756 size = round_page(offset + args->size) - trunc_page(offset); 757 758 if ((ret = agp_map_subregion(dev_priv->agph, 759 trunc_page(offset), size, &bsh)) != 0) 760 goto out_unpin; 761 vaddr = bus_space_vaddr(dev_priv->bst, bsh); 762 if (vaddr == NULL) { 763 ret = -EFAULT; 764 goto out_unmap; 765 } 766 767 ret = -copyin((char *)(uintptr_t)args->data_ptr, 768 vaddr + (offset & PAGE_MASK), args->size); 769 770 out_unmap: 771 agp_unmap_subregion(dev_priv->agph, bsh, size); 772 773 out_unpin: 774 i915_gem_object_unpin(obj); 775 out: 776 return ret; 777 } 778 779 /* Per-page copy function for the shmem pwrite fastpath. 780 * Flushes invalid cachelines before writing to the target if 781 * needs_clflush_before is set and flushes out any written cachelines after 782 * writing if needs_clflush is set. */ 783 static int 784 shmem_pwrite_fast(struct vm_page *page, int shmem_page_offset, int page_length, 785 char __user *user_data, 786 bool page_do_bit17_swizzling, 787 bool needs_clflush_before, 788 bool needs_clflush_after) 789 { 790 char *vaddr; 791 int ret; 792 793 if (unlikely(page_do_bit17_swizzling)) 794 return -EINVAL; 795 796 vaddr = kmap_atomic(page); 797 if (needs_clflush_before) 798 drm_clflush_virt_range(vaddr + shmem_page_offset, 799 page_length); 800 ret = __copy_from_user_inatomic_nocache(vaddr + shmem_page_offset, 801 user_data, 802 page_length); 803 if (needs_clflush_after) 804 drm_clflush_virt_range(vaddr + shmem_page_offset, 805 page_length); 806 kunmap_atomic(vaddr); 807 808 return ret ? -EFAULT : 0; 809 } 810 811 /* Only difference to the fast-path function is that this can handle bit17 812 * and uses non-atomic copy and kmap functions. */ 813 static int 814 shmem_pwrite_slow(struct vm_page *page, int shmem_page_offset, int page_length, 815 char __user *user_data, 816 bool page_do_bit17_swizzling, 817 bool needs_clflush_before, 818 bool needs_clflush_after) 819 { 820 char *vaddr; 821 int ret; 822 823 vaddr = kmap(page); 824 if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) 825 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 826 page_length, 827 page_do_bit17_swizzling); 828 if (page_do_bit17_swizzling) 829 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset, 830 user_data, 831 page_length); 832 else 833 ret = __copy_from_user(vaddr + shmem_page_offset, 834 user_data, 835 page_length); 836 if (needs_clflush_after) 837 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 838 page_length, 839 page_do_bit17_swizzling); 840 kunmap(vaddr); 841 842 return ret ? -EFAULT : 0; 843 } 844 845 static int 846 i915_gem_shmem_pwrite(struct drm_device *dev, 847 struct drm_i915_gem_object *obj, 848 struct drm_i915_gem_pwrite *args, 849 struct drm_file *file) 850 { 851 ssize_t remain; 852 off_t offset; 853 char __user *user_data; 854 int shmem_page_offset, page_length, ret = 0; 855 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 856 int hit_slowpath = 0; 857 int needs_clflush_after = 0; 858 int needs_clflush_before = 0; 859 int i; 860 861 user_data = (char __user *) (uintptr_t) args->data_ptr; 862 remain = args->size; 863 864 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 865 866 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 867 /* If we're not in the cpu write domain, set ourself into the gtt 868 * write domain and manually flush cachelines (if required). This 869 * optimizes for the case when the gpu will use the data 870 * right away and we therefore have to clflush anyway. */ 871 if (obj->cache_level == I915_CACHE_NONE) 872 needs_clflush_after = 1; 873 if (obj->gtt_space) { 874 ret = i915_gem_object_set_to_gtt_domain(obj, true); 875 if (ret) 876 return ret; 877 } 878 } 879 /* Same trick applies for invalidate partially written cachelines before 880 * writing. */ 881 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU) 882 && obj->cache_level == I915_CACHE_NONE) 883 needs_clflush_before = 1; 884 885 ret = i915_gem_object_get_pages(obj); 886 if (ret) 887 return ret; 888 889 i915_gem_object_pin_pages(obj); 890 891 offset = args->offset; 892 obj->dirty = 1; 893 894 for (i = 0; i < (obj->base.size >> PAGE_SHIFT); i++) { 895 struct vm_page *page; 896 int partial_cacheline_write; 897 898 if (i < offset >> PAGE_SHIFT) 899 continue; 900 901 if (remain <= 0) 902 break; 903 904 /* Operation in this page 905 * 906 * shmem_page_offset = offset within page in shmem file 907 * page_length = bytes to copy for this page 908 */ 909 shmem_page_offset = offset_in_page(offset); 910 911 page_length = remain; 912 if ((shmem_page_offset + page_length) > PAGE_SIZE) 913 page_length = PAGE_SIZE - shmem_page_offset; 914 915 /* If we don't overwrite a cacheline completely we need to be 916 * careful to have up-to-date data by first clflushing. Don't 917 * overcomplicate things and flush the entire patch. */ 918 partial_cacheline_write = needs_clflush_before && 919 ((shmem_page_offset | page_length) 920 & (curcpu()->ci_cflushsz - 1)); 921 922 #ifdef __linux__ 923 page = sg_page(sg); 924 page_do_bit17_swizzling = obj_do_bit17_swizzling && 925 (page_to_phys(page) & (1 << 17)) != 0; 926 #else 927 page = obj->pages[i]; 928 page_do_bit17_swizzling = obj_do_bit17_swizzling && 929 (VM_PAGE_TO_PHYS(page) & (1 << 17)) != 0; 930 #endif 931 932 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length, 933 user_data, page_do_bit17_swizzling, 934 partial_cacheline_write, 935 needs_clflush_after); 936 if (ret == 0) 937 goto next_page; 938 939 hit_slowpath = 1; 940 DRM_UNLOCK(); 941 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length, 942 user_data, page_do_bit17_swizzling, 943 partial_cacheline_write, 944 needs_clflush_after); 945 946 DRM_LOCK(); 947 948 next_page: 949 #ifdef __linux__ 950 set_page_dirty(page); 951 mark_page_accessed(page); 952 #else 953 atomic_clearbits_int(&page->pg_flags, PG_CLEAN); 954 #endif 955 956 if (ret) 957 goto out; 958 959 remain -= page_length; 960 user_data += page_length; 961 offset += page_length; 962 } 963 964 out: 965 i915_gem_object_unpin_pages(obj); 966 967 if (hit_slowpath) { 968 /* Fixup: Kill any reinstated backing storage pages */ 969 if (obj->madv == __I915_MADV_PURGED) 970 i915_gem_object_truncate(obj); 971 /* and flush dirty cachelines in case the object isn't in the cpu write 972 * domain anymore. */ 973 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 974 i915_gem_clflush_object(obj); 975 i915_gem_chipset_flush(dev); 976 } 977 } 978 979 if (needs_clflush_after) 980 i915_gem_chipset_flush(dev); 981 982 return ret; 983 } 984 985 /** 986 * Writes data to the object referenced by handle. 987 * 988 * On error, the contents of the buffer that were to be modified are undefined. 989 */ 990 int 991 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 992 struct drm_file *file) 993 { 994 struct drm_i915_gem_pwrite *args = data; 995 struct drm_i915_gem_object *obj; 996 int ret; 997 998 if (args->size == 0) 999 return 0; 1000 1001 ret = i915_mutex_lock_interruptible(dev); 1002 if (ret) 1003 return ret; 1004 1005 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1006 if (&obj->base == NULL) { 1007 ret = -ENOENT; 1008 goto unlock; 1009 } 1010 1011 /* Bounds check destination. */ 1012 if (args->offset > obj->base.size || 1013 args->size > obj->base.size - args->offset) { 1014 ret = -EINVAL; 1015 goto out; 1016 } 1017 1018 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 1019 1020 ret = -EFAULT; 1021 /* We can only do the GTT pwrite on untiled buffers, as otherwise 1022 * it would end up going through the fenced access, and we'll get 1023 * different detiling behavior between reading and writing. 1024 * pread/pwrite currently are reading and writing from the CPU 1025 * perspective, requiring manual detiling by the client. 1026 */ 1027 if (obj->phys_obj) { 1028 ret = i915_gem_phys_pwrite(dev, obj, args, file); 1029 goto out; 1030 } 1031 1032 if (obj->cache_level == I915_CACHE_NONE && 1033 obj->tiling_mode == I915_TILING_NONE && 1034 obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 1035 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); 1036 /* Note that the gtt paths might fail with non-page-backed user 1037 * pointers (e.g. gtt mappings when moving data between 1038 * textures). Fallback to the shmem path in that case. */ 1039 } 1040 1041 if (ret == -EFAULT || ret == -ENOSPC) 1042 ret = i915_gem_shmem_pwrite(dev, obj, args, file); 1043 1044 out: 1045 drm_gem_object_unreference(&obj->base); 1046 unlock: 1047 DRM_UNLOCK(); 1048 return ret; 1049 } 1050 1051 int 1052 i915_gem_check_wedge(struct drm_i915_private *dev_priv, 1053 bool interruptible) 1054 { 1055 if (atomic_read(&dev_priv->mm.wedged)) { 1056 bool recovery_complete; 1057 1058 /* Give the error handler a chance to run. */ 1059 mtx_enter(&dev_priv->error_completion_lock); 1060 recovery_complete = dev_priv->error_completion > 0; 1061 mtx_leave(&dev_priv->error_completion_lock); 1062 1063 /* Non-interruptible callers can't handle -EAGAIN, hence return 1064 * -EIO unconditionally for these. */ 1065 if (!interruptible) 1066 return -EIO; 1067 1068 /* Recovery complete, but still wedged means reset failure. */ 1069 if (recovery_complete) 1070 return -EIO; 1071 1072 return -EAGAIN; 1073 } 1074 1075 return 0; 1076 } 1077 1078 /* 1079 * Compare seqno against outstanding lazy request. Emit a request if they are 1080 * equal. 1081 */ 1082 static int 1083 i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno) 1084 { 1085 int ret; 1086 1087 // BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex)); 1088 1089 ret = 0; 1090 if (seqno == ring->outstanding_lazy_request) 1091 ret = i915_add_request(ring, NULL, NULL); 1092 1093 return ret; 1094 } 1095 1096 /** 1097 * __wait_seqno - wait until execution of seqno has finished 1098 * @ring: the ring expected to report seqno 1099 * @seqno: duh! 1100 * @interruptible: do an interruptible wait (normally yes) 1101 * @timeout: in - how long to wait (NULL forever); out - how much time remaining 1102 * 1103 * Returns 0 if the seqno was found within the alloted time. Else returns the 1104 * errno with remaining time filled in timeout argument. 1105 */ 1106 static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, 1107 bool interruptible, struct timespec *timeout) 1108 { 1109 drm_i915_private_t *dev_priv = ring->dev->dev_private; 1110 struct timespec before, now, wait_time={1,0}; 1111 struct timespec sleep_time; 1112 unsigned long timeout_jiffies; 1113 long end; 1114 bool wait_forever = true; 1115 int ret; 1116 1117 if (i915_seqno_passed(ring->get_seqno(ring, true), seqno)) 1118 return 0; 1119 1120 trace_i915_gem_request_wait_begin(ring, seqno); 1121 1122 if (timeout != NULL) { 1123 wait_time = *timeout; 1124 wait_forever = false; 1125 } 1126 1127 timeout_jiffies = timespec_to_jiffies(&wait_time); 1128 1129 if (WARN_ON(!ring->irq_get(ring))) 1130 return -ENODEV; 1131 1132 /* Record current time in case interrupted by signal, or wedged * */ 1133 nanouptime(&before); 1134 1135 #define EXIT_COND \ 1136 (i915_seqno_passed(ring->get_seqno(ring, false), seqno) || \ 1137 atomic_read(&dev_priv->mm.wedged)) 1138 do { 1139 end = timeout_jiffies; 1140 mtx_enter(&dev_priv->irq_lock); 1141 do { 1142 if (EXIT_COND) { 1143 ret = 0; 1144 break; 1145 } 1146 ret = msleep(ring, &dev_priv->irq_lock, 1147 PZERO | (interruptible ? PCATCH : 0), 1148 "gemwt", end); 1149 nanouptime(&now); 1150 timespecsub(&now, &before, &sleep_time); 1151 if (timespeccmp(&sleep_time, &wait_time, >=)) { 1152 end = 0; 1153 break; 1154 } 1155 end = timeout_jiffies - 1156 timespec_to_jiffies(&sleep_time); 1157 } while (ret == 0); 1158 mtx_leave(&dev_priv->irq_lock); 1159 switch (ret) { 1160 case 0: 1161 break; 1162 case ERESTART: 1163 end = -ERESTARTSYS; 1164 break; 1165 case EWOULDBLOCK: 1166 end = 0; 1167 break; 1168 default: 1169 end = -ret; 1170 break; 1171 } 1172 1173 ret = i915_gem_check_wedge(dev_priv, interruptible); 1174 if (ret) 1175 end = ret; 1176 } while (end == 0 && wait_forever); 1177 1178 nanouptime(&now); 1179 1180 ring->irq_put(ring); 1181 trace_i915_gem_request_wait_end(ring, seqno); 1182 #undef EXIT_COND 1183 1184 if (timeout) { 1185 timespecsub(&now, &before, &sleep_time); 1186 timespecsub(timeout, &sleep_time, timeout); 1187 } 1188 1189 switch (end) { 1190 case -EIO: 1191 case -EAGAIN: /* Wedged */ 1192 case -ERESTARTSYS: /* Signal */ 1193 return (int)end; 1194 case 0: /* Timeout */ 1195 if (timeout) 1196 timeout->tv_sec = timeout->tv_nsec = 0; 1197 return -ETIMEDOUT; 1198 default: /* Completed */ 1199 WARN_ON(end < 0); /* We're not aware of other errors */ 1200 return 0; 1201 } 1202 } 1203 1204 /** 1205 * Waits for a sequence number to be signaled, and cleans up the 1206 * request and object lists appropriately for that event. 1207 */ 1208 int 1209 i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno) 1210 { 1211 struct drm_device *dev = ring->dev; 1212 struct drm_i915_private *dev_priv = dev->dev_private; 1213 bool interruptible = dev_priv->mm.interruptible; 1214 int ret; 1215 1216 // BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1217 BUG_ON(seqno == 0); 1218 1219 ret = i915_gem_check_wedge(dev_priv, interruptible); 1220 if (ret) 1221 return ret; 1222 1223 ret = i915_gem_check_olr(ring, seqno); 1224 if (ret) 1225 return ret; 1226 1227 return __wait_seqno(ring, seqno, interruptible, NULL); 1228 } 1229 1230 /** 1231 * Ensures that all rendering to the object has completed and the object is 1232 * safe to unbind from the GTT or access from the CPU. 1233 */ 1234 static __must_check int 1235 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, 1236 bool readonly) 1237 { 1238 struct intel_ring_buffer *ring = obj->ring; 1239 u32 seqno; 1240 int ret; 1241 1242 seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno; 1243 if (seqno == 0) 1244 return 0; 1245 1246 ret = i915_wait_seqno(ring, seqno); 1247 if (ret) 1248 return ret; 1249 1250 i915_gem_retire_requests_ring(ring); 1251 1252 /* Manually manage the write flush as we may have not yet 1253 * retired the buffer. 1254 */ 1255 if (obj->last_write_seqno && 1256 i915_seqno_passed(seqno, obj->last_write_seqno)) { 1257 obj->last_write_seqno = 0; 1258 obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; 1259 } 1260 1261 return 0; 1262 } 1263 1264 /* A nonblocking variant of the above wait. This is a highly dangerous routine 1265 * as the object state may change during this call. 1266 */ 1267 static __must_check int 1268 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, 1269 bool readonly) 1270 { 1271 struct drm_device *dev = obj->base.dev; 1272 struct drm_i915_private *dev_priv = dev->dev_private; 1273 struct intel_ring_buffer *ring = obj->ring; 1274 u32 seqno; 1275 int ret; 1276 1277 rw_assert_wrlock(&dev->dev_lock); 1278 BUG_ON(!dev_priv->mm.interruptible); 1279 1280 seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno; 1281 if (seqno == 0) 1282 return 0; 1283 1284 ret = i915_gem_check_wedge(dev_priv, true); 1285 if (ret) 1286 return ret; 1287 1288 ret = i915_gem_check_olr(ring, seqno); 1289 if (ret) 1290 return ret; 1291 1292 DRM_UNLOCK(); 1293 ret = __wait_seqno(ring, seqno, true, NULL); 1294 DRM_LOCK(); 1295 1296 i915_gem_retire_requests_ring(ring); 1297 1298 /* Manually manage the write flush as we may have not yet 1299 * retired the buffer. 1300 */ 1301 if (ret == 0 && 1302 obj->last_write_seqno && 1303 i915_seqno_passed(seqno, obj->last_write_seqno)) { 1304 obj->last_write_seqno = 0; 1305 obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; 1306 } 1307 1308 return ret; 1309 } 1310 1311 /** 1312 * Called when user space prepares to use an object with the CPU, either 1313 * through the mmap ioctl's mapping or a GTT mapping. 1314 */ 1315 int 1316 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1317 struct drm_file *file) 1318 { 1319 struct drm_i915_gem_set_domain *args = data; 1320 struct drm_i915_gem_object *obj; 1321 uint32_t read_domains = args->read_domains; 1322 uint32_t write_domain = args->write_domain; 1323 int ret; 1324 1325 /* Only handle setting domains to types used by the CPU. */ 1326 if (write_domain & I915_GEM_GPU_DOMAINS) 1327 return -EINVAL; 1328 1329 if (read_domains & I915_GEM_GPU_DOMAINS) 1330 return -EINVAL; 1331 1332 /* Having something in the write domain implies it's in the read 1333 * domain, and only that read domain. Enforce that in the request. 1334 */ 1335 if (write_domain != 0 && read_domains != write_domain) 1336 return -EINVAL; 1337 1338 ret = i915_mutex_lock_interruptible(dev); 1339 if (ret) 1340 return ret; 1341 1342 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1343 if (&obj->base == NULL) { 1344 ret = -ENOENT; 1345 goto unlock; 1346 } 1347 1348 /* Try to flush the object off the GPU without holding the lock. 1349 * We will repeat the flush holding the lock in the normal manner 1350 * to catch cases where we are gazumped. 1351 */ 1352 ret = i915_gem_object_wait_rendering__nonblocking(obj, !write_domain); 1353 if (ret) 1354 goto unref; 1355 1356 if (read_domains & I915_GEM_DOMAIN_GTT) { 1357 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 1358 1359 /* Silently promote "you're not bound, there was nothing to do" 1360 * to success, since the client was just asking us to 1361 * make sure everything was done. 1362 */ 1363 if (ret == -EINVAL) 1364 ret = 0; 1365 } else { 1366 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 1367 } 1368 1369 unref: 1370 drm_gem_object_unreference(&obj->base); 1371 unlock: 1372 DRM_UNLOCK(); 1373 return ret; 1374 } 1375 1376 /** 1377 * Called when user space has done writes to this buffer 1378 */ 1379 int 1380 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1381 struct drm_file *file) 1382 { 1383 struct drm_i915_gem_sw_finish *args = data; 1384 struct drm_i915_gem_object *obj; 1385 int ret = 0; 1386 1387 ret = i915_mutex_lock_interruptible(dev); 1388 if (ret) 1389 return ret; 1390 1391 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1392 if (&obj->base == NULL) { 1393 ret = -ENOENT; 1394 goto unlock; 1395 } 1396 1397 /* Pinned buffers may be scanout, so flush the cache */ 1398 if (obj->pin_count) 1399 i915_gem_object_flush_cpu_write_domain(obj); 1400 1401 drm_gem_object_unreference(&obj->base); 1402 unlock: 1403 DRM_UNLOCK(); 1404 return ret; 1405 } 1406 1407 /** 1408 * Maps the contents of an object, returning the address it is mapped 1409 * into. 1410 * 1411 * While the mapping holds a reference on the contents of the object, it doesn't 1412 * imply a ref on the object itself. 1413 */ 1414 int 1415 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1416 struct drm_file *file) 1417 { 1418 struct drm_i915_gem_mmap *args = data; 1419 struct drm_gem_object *obj; 1420 vaddr_t addr; 1421 vsize_t size; 1422 int ret; 1423 1424 size = round_page(args->size); 1425 if (size == 0) 1426 return -EINVAL; 1427 1428 if (args->offset + size < args->offset) 1429 return -EINVAL; 1430 if (args->offset & PAGE_MASK) 1431 return -EINVAL; 1432 1433 obj = drm_gem_object_lookup(dev, file, args->handle); 1434 if (obj == NULL) 1435 return -ENOENT; 1436 1437 addr = 0; 1438 ret = -uvm_map(&curproc->p_vmspace->vm_map, &addr, size, 1439 obj->uao, args->offset, 0, UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, 1440 UVM_INH_SHARE, UVM_ADV_RANDOM, 0)); 1441 if (ret == 0) 1442 uao_reference(obj->uao); 1443 drm_gem_object_unreference_unlocked(obj); 1444 if (ret) 1445 return ret; 1446 1447 args->addr_ptr = (uint64_t) addr; 1448 1449 return 0; 1450 } 1451 1452 int 1453 i915_gem_fault(struct drm_gem_object *gem_obj, struct uvm_faultinfo *ufi, 1454 off_t offset, vaddr_t vaddr, vm_page_t *pps, int npages, int centeridx, 1455 vm_prot_t access_type, int flags) 1456 { 1457 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 1458 struct drm_device *dev = obj->base.dev; 1459 drm_i915_private_t *dev_priv = dev->dev_private; 1460 paddr_t paddr; 1461 int lcv, ret; 1462 int write = !!(access_type & VM_PROT_WRITE); 1463 vm_prot_t mapprot; 1464 boolean_t locked = TRUE; 1465 1466 dev_priv->entries++; 1467 1468 /* 1469 * If we already own the lock, we must be doing a copyin or 1470 * copyout in one of the fast paths. Return failure such that 1471 * we fall back on the slow path. 1472 */ 1473 if (!obj->base.map || RWLOCK_OWNER(&dev->dev_lock) == curproc) { 1474 uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, 1475 &obj->base.uobj, NULL); 1476 dev_priv->entries--; 1477 return (VM_PAGER_BAD); 1478 } 1479 1480 offset -= obj->base.map->ext; 1481 1482 if (rw_enter(&dev->dev_lock, RW_NOSLEEP | RW_WRITE) != 0) { 1483 uvmfault_unlockall(ufi, NULL, &obj->base.uobj, NULL); 1484 DRM_LOCK(); 1485 locked = uvmfault_relock(ufi); 1486 } 1487 if (!locked) { 1488 DRM_UNLOCK(); 1489 dev_priv->entries--; 1490 return (VM_PAGER_REFAULT); 1491 } 1492 1493 /* Now bind it into the GTT if needed */ 1494 ret = i915_gem_object_pin(obj, 0, true, false); 1495 if (ret) 1496 goto unlock; 1497 1498 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1499 if (ret) 1500 goto unpin; 1501 1502 ret = i915_gem_object_get_fence(obj); 1503 if (ret) 1504 goto unpin; 1505 1506 obj->fault_mappable = true; 1507 1508 mapprot = ufi->entry->protection; 1509 /* 1510 * if it's only a read fault, we only put ourselves into the gtt 1511 * read domain, so make sure we fault again and set ourselves to write. 1512 * this prevents us needing userland to do domain management and get 1513 * it wrong, and makes us fully coherent with the gpu re mmap. 1514 */ 1515 if (write == 0) 1516 mapprot &= ~VM_PROT_WRITE; 1517 /* XXX try and be more efficient when we do this */ 1518 for (lcv = 0 ; lcv < npages ; lcv++, offset += PAGE_SIZE, 1519 vaddr += PAGE_SIZE) { 1520 if ((flags & PGO_ALLPAGES) == 0 && lcv != centeridx) 1521 continue; 1522 1523 if (pps[lcv] == PGO_DONTCARE) 1524 continue; 1525 1526 paddr = dev->agp->base + obj->gtt_offset + offset; 1527 1528 if (pmap_enter(ufi->orig_map->pmap, vaddr, paddr, 1529 mapprot, PMAP_CANFAIL | mapprot) != 0) { 1530 i915_gem_object_unpin(obj); 1531 uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, 1532 NULL, NULL); 1533 DRM_UNLOCK(); 1534 dev_priv->entries--; 1535 pmap_update(ufi->orig_map->pmap); 1536 uvm_wait("intelflt"); 1537 return (VM_PAGER_REFAULT); 1538 } 1539 } 1540 unpin: 1541 i915_gem_object_unpin(obj); 1542 unlock: 1543 uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, NULL, NULL); 1544 DRM_UNLOCK(); 1545 dev_priv->entries--; 1546 pmap_update(ufi->orig_map->pmap); 1547 1548 switch (ret) { 1549 case -EIO: 1550 /* If this -EIO is due to a gpu hang, give the reset code a 1551 * chance to clean up the mess. Otherwise return the proper 1552 * SIGBUS. */ 1553 if (!atomic_read(&dev_priv->mm.wedged)) 1554 return VM_PAGER_ERROR; 1555 case -EAGAIN: 1556 /* Give the error handler a chance to run and move the 1557 * objects off the GPU active list. Next time we service the 1558 * fault, we should be able to transition the page into the 1559 * GTT without touching the GPU (and so avoid further 1560 * EIO/EGAIN). If the GPU is wedged, then there is no issue 1561 * with coherency, just lost writes. 1562 */ 1563 #if 0 1564 set_need_resched(); 1565 #endif 1566 case 0: 1567 case -ERESTART: 1568 case -EINTR: 1569 case -EBUSY: 1570 /* 1571 * EBUSY is ok: this just means that another thread 1572 * already did the job. 1573 */ 1574 return VM_PAGER_OK; 1575 case -ENOMEM: 1576 return VM_PAGER_ERROR; 1577 case -ENOSPC: 1578 return VM_PAGER_ERROR; 1579 default: 1580 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 1581 return VM_PAGER_ERROR; 1582 } 1583 } 1584 1585 /** 1586 * i915_gem_release_mmap - remove physical page mappings 1587 * @obj: obj in question 1588 * 1589 * Preserve the reservation of the mmapping with the DRM core code, but 1590 * relinquish ownership of the pages back to the system. 1591 * 1592 * It is vital that we remove the page mapping if we have mapped a tiled 1593 * object through the GTT and then lose the fence register due to 1594 * resource pressure. Similarly if the object has been moved out of the 1595 * aperture, than pages mapped into userspace must be revoked. Removing the 1596 * mapping will then trigger a page fault on the next user access, allowing 1597 * fixup by i915_gem_fault(). 1598 */ 1599 void 1600 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 1601 { 1602 struct inteldrm_softc *dev_priv = obj->base.dev->dev_private; 1603 struct vm_page *pg; 1604 1605 if (!obj->fault_mappable) 1606 return; 1607 1608 for (pg = &dev_priv->pgs[atop(obj->gtt_offset)]; 1609 pg != &dev_priv->pgs[atop(obj->gtt_offset + obj->base.size)]; 1610 pg++) 1611 pmap_page_protect(pg, VM_PROT_NONE); 1612 1613 obj->fault_mappable = false; 1614 } 1615 1616 static uint32_t 1617 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) 1618 { 1619 uint32_t gtt_size; 1620 1621 if (INTEL_INFO(dev)->gen >= 4 || 1622 tiling_mode == I915_TILING_NONE) 1623 return size; 1624 1625 /* Previous chips need a power-of-two fence region when tiling */ 1626 if (INTEL_INFO(dev)->gen == 3) 1627 gtt_size = 1024*1024; 1628 else 1629 gtt_size = 512*1024; 1630 1631 while (gtt_size < size) 1632 gtt_size <<= 1; 1633 1634 return gtt_size; 1635 } 1636 1637 /** 1638 * i915_gem_get_gtt_alignment - return required GTT alignment for an object 1639 * @obj: object to check 1640 * 1641 * Return the required GTT alignment for an object, taking into account 1642 * potential fence register mapping. 1643 */ 1644 static uint32_t 1645 i915_gem_get_gtt_alignment(struct drm_device *dev, 1646 uint32_t size, 1647 int tiling_mode) 1648 { 1649 /* 1650 * Minimum alignment is 4k (GTT page size), but might be greater 1651 * if a fence register is needed for the object. 1652 */ 1653 if (INTEL_INFO(dev)->gen >= 4 || 1654 tiling_mode == I915_TILING_NONE) 1655 return 4096; 1656 1657 /* 1658 * Previous chips need to be aligned to the size of the smallest 1659 * fence register that can contain the object. 1660 */ 1661 return i915_gem_get_gtt_size(dev, size, tiling_mode); 1662 } 1663 1664 /** 1665 * i915_gem_get_unfenced_gtt_alignment - return required GTT alignment for an 1666 * unfenced object 1667 * @dev: the device 1668 * @size: size of the object 1669 * @tiling_mode: tiling mode of the object 1670 * 1671 * Return the required GTT alignment for an object, only taking into account 1672 * unfenced tiled surface requirements. 1673 */ 1674 uint32_t 1675 i915_gem_get_unfenced_gtt_alignment(struct drm_device *dev, 1676 uint32_t size, 1677 int tiling_mode) 1678 { 1679 /* 1680 * Minimum alignment is 4k (GTT page size) for sane hw. 1681 */ 1682 if (INTEL_INFO(dev)->gen >= 4 || IS_G33(dev) || 1683 tiling_mode == I915_TILING_NONE) 1684 return 4096; 1685 1686 /* Previous hardware however needs to be aligned to a power-of-two 1687 * tile height. The simplest method for determining this is to reuse 1688 * the power-of-tile object size. 1689 */ 1690 return i915_gem_get_gtt_size(dev, size, tiling_mode); 1691 } 1692 1693 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) 1694 { 1695 #if 0 1696 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 1697 #endif 1698 int ret; 1699 1700 if (obj->base.map) 1701 return 0; 1702 1703 #if 0 1704 dev_priv->mm.shrinker_no_lock_stealing = true; 1705 #endif 1706 1707 ret = drm_gem_create_mmap_offset(&obj->base); 1708 #if 0 1709 if (ret != -ENOSPC) 1710 goto out; 1711 1712 /* Badly fragmented mmap space? The only way we can recover 1713 * space is by destroying unwanted objects. We can't randomly release 1714 * mmap_offsets as userspace expects them to be persistent for the 1715 * lifetime of the objects. The closest we can is to release the 1716 * offsets on purgeable objects by truncating it and marking it purged, 1717 * which prevents userspace from ever using that object again. 1718 */ 1719 i915_gem_purge(dev_priv, obj->base.size >> PAGE_SHIFT); 1720 ret = drm_gem_create_mmap_offset(&obj->base); 1721 if (ret != -ENOSPC) 1722 goto out; 1723 1724 i915_gem_shrink_all(dev_priv); 1725 ret = drm_gem_create_mmap_offset(&obj->base); 1726 out: 1727 dev_priv->mm.shrinker_no_lock_stealing = false; 1728 #endif 1729 1730 return ret; 1731 } 1732 1733 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) 1734 { 1735 if (!obj->base.map) 1736 return; 1737 1738 drm_gem_free_mmap_offset(&obj->base); 1739 } 1740 1741 int 1742 i915_gem_mmap_gtt(struct drm_file *file, 1743 struct drm_device *dev, 1744 uint32_t handle, 1745 uint64_t *offset) 1746 { 1747 struct drm_i915_private *dev_priv = dev->dev_private; 1748 struct drm_i915_gem_object *obj; 1749 int ret; 1750 1751 ret = i915_mutex_lock_interruptible(dev); 1752 if (ret) 1753 return ret; 1754 1755 obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle)); 1756 if (&obj->base == NULL) { 1757 ret = -ENOENT; 1758 goto unlock; 1759 } 1760 1761 if (obj->base.size > dev_priv->mm.gtt_mappable_end) { 1762 ret = -E2BIG; 1763 goto out; 1764 } 1765 1766 if (obj->madv != I915_MADV_WILLNEED) { 1767 DRM_ERROR("Attempting to mmap a purgeable buffer\n"); 1768 ret = -EINVAL; 1769 goto out; 1770 } 1771 1772 ret = i915_gem_object_create_mmap_offset(obj); 1773 if (ret) 1774 goto out; 1775 1776 *offset = (u64)obj->base.map->ext; 1777 1778 out: 1779 drm_gem_object_unreference(&obj->base); 1780 unlock: 1781 DRM_UNLOCK(); 1782 return ret; 1783 } 1784 1785 /** 1786 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 1787 * @dev: DRM device 1788 * @data: GTT mapping ioctl data 1789 * @file: GEM object info 1790 * 1791 * Simply returns the fake offset to userspace so it can mmap it. 1792 * The mmap call will end up in drm_gem_mmap(), which will set things 1793 * up so we can get faults in the handler above. 1794 * 1795 * The fault handler will take care of binding the object into the GTT 1796 * (since it may have been evicted to make room for something), allocating 1797 * a fence register, and mapping the appropriate aperture address into 1798 * userspace. 1799 */ 1800 int 1801 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 1802 struct drm_file *file) 1803 { 1804 struct drm_i915_gem_mmap_gtt *args = data; 1805 1806 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); 1807 } 1808 1809 /* Immediately discard the backing storage */ 1810 static void 1811 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 1812 { 1813 i915_gem_object_free_mmap_offset(obj); 1814 1815 obj->base.uao->pgops->pgo_flush(obj->base.uao, 0, obj->base.size, 1816 PGO_ALLPAGES | PGO_FREE); 1817 1818 obj->madv = __I915_MADV_PURGED; 1819 } 1820 1821 // i915_gem_object_is_purgeable 1822 1823 static void 1824 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) 1825 { 1826 int page_count = obj->base.size / PAGE_SIZE; 1827 #ifdef __linux__ 1828 struct scatterlist *sg; 1829 #endif 1830 int ret, i; 1831 1832 BUG_ON(obj->madv == __I915_MADV_PURGED); 1833 1834 ret = i915_gem_object_set_to_cpu_domain(obj, true); 1835 if (ret) { 1836 /* In the event of a disaster, abandon all caches and 1837 * hope for the best. 1838 */ 1839 WARN_ON(ret != -EIO); 1840 i915_gem_clflush_object(obj); 1841 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 1842 } 1843 1844 if (i915_gem_object_needs_bit17_swizzle(obj)) 1845 i915_gem_object_save_bit_17_swizzle(obj); 1846 1847 if (obj->madv == I915_MADV_DONTNEED) 1848 obj->dirty = 0; 1849 1850 #ifdef __linux__ 1851 for_each_sg(obj->pages->sgl, sg, page_count, i) { 1852 struct page *page = sg_page(sg); 1853 1854 if (obj->dirty) 1855 set_page_dirty(page); 1856 1857 if (obj->madv == I915_MADV_WILLNEED) 1858 mark_page_accessed(page); 1859 1860 page_cache_release(page); 1861 } 1862 #else 1863 for (i = 0; i < page_count; i++) { 1864 struct vm_page *page = obj->pages[i]; 1865 1866 if (obj->dirty) 1867 atomic_clearbits_int(&page->pg_flags, PG_CLEAN); 1868 } 1869 uvm_objunwire(obj->base.uao, 0, obj->base.size); 1870 #endif 1871 obj->dirty = 0; 1872 1873 #ifdef __linux__ 1874 sg_free_table(obj->pages); 1875 #endif 1876 kfree(obj->pages); 1877 } 1878 1879 static int 1880 i915_gem_object_put_pages(struct drm_i915_gem_object *obj) 1881 { 1882 const struct drm_i915_gem_object_ops *ops = obj->ops; 1883 1884 if (obj->pages == NULL) 1885 return 0; 1886 1887 BUG_ON(obj->gtt_space); 1888 1889 if (obj->pages_pin_count) 1890 return -EBUSY; 1891 1892 /* ->put_pages might need to allocate memory for the bit17 swizzle 1893 * array, hence protect them from being reaped by removing them from gtt 1894 * lists early. */ 1895 list_del(&obj->gtt_list); 1896 1897 ops->put_pages(obj); 1898 obj->pages = NULL; 1899 1900 if (i915_gem_object_is_purgeable(obj)) 1901 i915_gem_object_truncate(obj); 1902 1903 return 0; 1904 } 1905 1906 #ifdef notyet 1907 static long 1908 __i915_gem_shrink(struct drm_i915_private *dev_priv, long target, 1909 bool purgeable_only) 1910 { 1911 struct drm_i915_gem_object *obj, *next; 1912 long count = 0; 1913 1914 list_for_each_entry_safe(obj, next, 1915 &dev_priv->mm.unbound_list, 1916 gtt_list) { 1917 if ((i915_gem_object_is_purgeable(obj) || !purgeable_only) && 1918 i915_gem_object_put_pages(obj) == 0) { 1919 count += obj->base.size >> PAGE_SHIFT; 1920 if (count >= target) 1921 return count; 1922 } 1923 } 1924 1925 list_for_each_entry_safe(obj, next, 1926 &dev_priv->mm.inactive_list, 1927 mm_list) { 1928 if ((i915_gem_object_is_purgeable(obj) || !purgeable_only) && 1929 i915_gem_object_unbind(obj) == 0 && 1930 i915_gem_object_put_pages(obj) == 0) { 1931 count += obj->base.size >> PAGE_SHIFT; 1932 if (count >= target) 1933 return count; 1934 } 1935 } 1936 1937 return count; 1938 } 1939 1940 static long 1941 i915_gem_purge(struct drm_i915_private *dev_priv, long target) 1942 { 1943 return __i915_gem_shrink(dev_priv, target, true); 1944 } 1945 1946 static void 1947 i915_gem_shrink_all(struct drm_i915_private *dev_priv) 1948 { 1949 struct drm_i915_gem_object *obj, *next; 1950 1951 i915_gem_evict_everything(dev_priv->dev); 1952 1953 list_for_each_entry_safe(obj, next, &dev_priv->mm.unbound_list, gtt_list) 1954 i915_gem_object_put_pages(obj); 1955 } 1956 #endif /* notyet */ 1957 1958 static int 1959 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 1960 { 1961 #ifdef __linux__ 1962 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 1963 int page_count, i; 1964 struct address_space *mapping; 1965 struct sg_table *st; 1966 struct scatterlist *sg; 1967 struct page *page; 1968 gfp_t gfp; 1969 #else 1970 int page_count, i; 1971 struct vm_page **st; 1972 struct pglist plist; 1973 struct vm_page *page; 1974 #endif 1975 1976 /* Assert that the object is not currently in any GPU domain. As it 1977 * wasn't in the GTT, there shouldn't be any way it could have been in 1978 * a GPU cache 1979 */ 1980 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); 1981 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); 1982 1983 #ifdef __linux__ 1984 st = kmalloc(sizeof(*st), GFP_KERNEL); 1985 if (st == NULL) 1986 return -ENOMEM; 1987 1988 page_count = obj->base.size / PAGE_SIZE; 1989 if (sg_alloc_table(st, page_count, GFP_KERNEL)) { 1990 sg_free_table(st); 1991 kfree(st); 1992 return -ENOMEM; 1993 } 1994 1995 /* Get the list of pages out of our struct file. They'll be pinned 1996 * at this point until we release them. 1997 * 1998 * Fail silently without starting the shrinker 1999 */ 2000 mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 2001 gfp = mapping_gfp_mask(mapping); 2002 gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD; 2003 gfp &= ~(__GFP_IO | __GFP_WAIT); 2004 for_each_sg(st->sgl, sg, page_count, i) { 2005 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 2006 if (IS_ERR(page)) { 2007 i915_gem_purge(dev_priv, page_count); 2008 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 2009 } 2010 if (IS_ERR(page)) { 2011 /* We've tried hard to allocate the memory by reaping 2012 * our own buffer, now let the real VM do its job and 2013 * go down in flames if truly OOM. 2014 */ 2015 gfp &= ~(__GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD); 2016 gfp |= __GFP_IO | __GFP_WAIT; 2017 2018 i915_gem_shrink_all(dev_priv); 2019 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 2020 if (IS_ERR(page)) 2021 goto err_pages; 2022 2023 gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD; 2024 gfp &= ~(__GFP_IO | __GFP_WAIT); 2025 } 2026 2027 sg_set_page(sg, page, PAGE_SIZE, 0); 2028 } 2029 2030 obj->pages = st; 2031 #else 2032 page_count = obj->base.size / PAGE_SIZE; 2033 st = malloc(page_count * sizeof(struct vm_page *), M_DRM, 2034 M_WAITOK | M_CANFAIL); 2035 if (st == NULL) 2036 return -ENOMEM; 2037 2038 TAILQ_INIT(&plist); 2039 if (uvm_objwire(obj->base.uao, 0, obj->base.size, &plist)) 2040 goto err_pages; 2041 2042 i = 0; 2043 TAILQ_FOREACH(page, &plist, pageq) { 2044 st[i] = page; 2045 i++; 2046 } 2047 obj->pages = st; 2048 #endif 2049 2050 if (i915_gem_object_needs_bit17_swizzle(obj)) 2051 i915_gem_object_do_bit_17_swizzle(obj); 2052 2053 return 0; 2054 2055 #ifdef __linux__ 2056 err_pages: 2057 for_each_sg(st->sgl, sg, i, page_count) 2058 page_cache_release(sg_page(sg)); 2059 sg_free_table(st); 2060 kfree(st); 2061 return PTR_ERR(page); 2062 #else 2063 err_pages: 2064 free(st, M_DRM); 2065 return -ENOMEM; 2066 #endif 2067 } 2068 2069 /* Ensure that the associated pages are gathered from the backing storage 2070 * and pinned into our object. i915_gem_object_get_pages() may be called 2071 * multiple times before they are released by a single call to 2072 * i915_gem_object_put_pages() - once the pages are no longer referenced 2073 * either as a result of memory pressure (reaping pages under the shrinker) 2074 * or as the object is itself released. 2075 */ 2076 int 2077 i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2078 { 2079 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2080 const struct drm_i915_gem_object_ops *ops = obj->ops; 2081 int ret; 2082 2083 if (obj->pages) 2084 return 0; 2085 2086 BUG_ON(obj->pages_pin_count); 2087 2088 ret = ops->get_pages(obj); 2089 if (ret) 2090 return ret; 2091 2092 list_add_tail(&obj->gtt_list, &dev_priv->mm.unbound_list); 2093 return 0; 2094 } 2095 2096 void 2097 i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, 2098 struct intel_ring_buffer *ring) 2099 { 2100 struct drm_device *dev = obj->base.dev; 2101 struct drm_i915_private *dev_priv = dev->dev_private; 2102 u32 seqno = intel_ring_get_seqno(ring); 2103 2104 BUG_ON(ring == NULL); 2105 if (obj->ring != ring && obj->last_write_seqno) { 2106 /* Keep the seqno relative to the current ring */ 2107 obj->last_write_seqno = seqno; 2108 } 2109 obj->ring = ring; 2110 2111 /* Add a reference if we're newly entering the active list. */ 2112 if (!obj->active) { 2113 drm_gem_object_reference(&obj->base); 2114 obj->active = 1; 2115 } 2116 2117 /* Move from whatever list we were on to the tail of execution. */ 2118 list_move_tail(&obj->mm_list, &dev_priv->mm.active_list); 2119 list_move_tail(&obj->ring_list, &ring->active_list); 2120 2121 obj->last_read_seqno = seqno; 2122 2123 if (obj->fenced_gpu_access) { 2124 obj->last_fenced_seqno = seqno; 2125 2126 /* Bump MRU to take account of the delayed flush */ 2127 if (obj->fence_reg != I915_FENCE_REG_NONE) { 2128 struct drm_i915_fence_reg *reg; 2129 2130 reg = &dev_priv->fence_regs[obj->fence_reg]; 2131 list_move_tail(®->lru_list, 2132 &dev_priv->mm.fence_list); 2133 } 2134 } 2135 } 2136 2137 static void 2138 i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj) 2139 { 2140 struct drm_device *dev = obj->base.dev; 2141 struct drm_i915_private *dev_priv = dev->dev_private; 2142 2143 BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS); 2144 BUG_ON(!obj->active); 2145 2146 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list); 2147 2148 list_del_init(&obj->ring_list); 2149 obj->ring = NULL; 2150 2151 obj->last_read_seqno = 0; 2152 obj->last_write_seqno = 0; 2153 obj->base.write_domain = 0; 2154 2155 obj->last_fenced_seqno = 0; 2156 obj->fenced_gpu_access = false; 2157 2158 obj->active = 0; 2159 drm_gem_object_unreference(&obj->base); 2160 2161 WARN_ON(i915_verify_lists(dev)); 2162 } 2163 2164 static int 2165 i915_gem_handle_seqno_wrap(struct drm_device *dev) 2166 { 2167 struct drm_i915_private *dev_priv = dev->dev_private; 2168 struct intel_ring_buffer *ring; 2169 int ret, i, j; 2170 2171 /* The hardware uses various monotonic 32-bit counters, if we 2172 * detect that they will wraparound we need to idle the GPU 2173 * and reset those counters. 2174 */ 2175 ret = 0; 2176 for_each_ring(ring, dev_priv, i) { 2177 for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++) 2178 ret |= ring->sync_seqno[j] != 0; 2179 } 2180 if (ret == 0) 2181 return ret; 2182 2183 ret = i915_gpu_idle(dev); 2184 if (ret) 2185 return ret; 2186 2187 i915_gem_retire_requests(dev); 2188 for_each_ring(ring, dev_priv, i) { 2189 for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++) 2190 ring->sync_seqno[j] = 0; 2191 } 2192 2193 return 0; 2194 } 2195 2196 int 2197 i915_gem_get_seqno(struct drm_device *dev, u32 *seqno) 2198 { 2199 struct drm_i915_private *dev_priv = dev->dev_private; 2200 2201 /* reserve 0 for non-seqno */ 2202 if (dev_priv->next_seqno == 0) { 2203 int ret = i915_gem_handle_seqno_wrap(dev); 2204 if (ret) 2205 return ret; 2206 2207 dev_priv->next_seqno = 1; 2208 } 2209 2210 *seqno = dev_priv->next_seqno++; 2211 return 0; 2212 } 2213 2214 int 2215 i915_add_request(struct intel_ring_buffer *ring, 2216 struct drm_file *file, 2217 u32 *out_seqno) 2218 { 2219 drm_i915_private_t *dev_priv = ring->dev->dev_private; 2220 struct drm_i915_gem_request *request; 2221 u32 request_ring_position; 2222 int was_empty; 2223 int ret; 2224 2225 /* 2226 * Emit any outstanding flushes - execbuf can fail to emit the flush 2227 * after having emitted the batchbuffer command. Hence we need to fix 2228 * things up similar to emitting the lazy request. The difference here 2229 * is that the flush _must_ happen before the next request, no matter 2230 * what. 2231 */ 2232 ret = intel_ring_flush_all_caches(ring); 2233 if (ret) 2234 return ret; 2235 2236 request = kmalloc(sizeof(*request), GFP_KERNEL); 2237 if (request == NULL) 2238 return -ENOMEM; 2239 2240 2241 /* Record the position of the start of the request so that 2242 * should we detect the updated seqno part-way through the 2243 * GPU processing the request, we never over-estimate the 2244 * position of the head. 2245 */ 2246 request_ring_position = intel_ring_get_tail(ring); 2247 2248 ret = ring->add_request(ring); 2249 if (ret) { 2250 kfree(request); 2251 return ret; 2252 } 2253 2254 request->seqno = intel_ring_get_seqno(ring); 2255 request->ring = ring; 2256 request->tail = request_ring_position; 2257 request->emitted_ticks = ticks; 2258 was_empty = list_empty(&ring->request_list); 2259 list_add_tail(&request->list, &ring->request_list); 2260 request->file_priv = NULL; 2261 2262 if (file) { 2263 struct drm_i915_file_private *file_priv = file->driver_priv; 2264 2265 mtx_enter(&file_priv->mm.lock); 2266 request->file_priv = file_priv; 2267 list_add_tail(&request->client_list, 2268 &file_priv->mm.request_list); 2269 mtx_leave(&file_priv->mm.lock); 2270 } 2271 2272 trace_i915_gem_request_add(ring, request->seqno); 2273 ring->outstanding_lazy_request = 0; 2274 2275 if (!dev_priv->mm.suspended) { 2276 if (i915_enable_hangcheck) { 2277 timeout_add_msec(&dev_priv->hangcheck_timer, 2278 DRM_I915_HANGCHECK_PERIOD); 2279 } 2280 if (was_empty) { 2281 timeout_add_sec(&dev_priv->mm.retire_timer, 1); 2282 intel_mark_busy(ring->dev); 2283 } 2284 } 2285 2286 if (out_seqno) 2287 *out_seqno = request->seqno; 2288 return 0; 2289 } 2290 2291 static inline void 2292 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) 2293 { 2294 struct drm_i915_file_private *file_priv = request->file_priv; 2295 2296 if (!file_priv) 2297 return; 2298 2299 mtx_enter(&file_priv->mm.lock); 2300 if (request->file_priv) { 2301 list_del(&request->client_list); 2302 request->file_priv = NULL; 2303 } 2304 mtx_leave(&file_priv->mm.lock); 2305 } 2306 2307 static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv, 2308 struct intel_ring_buffer *ring) 2309 { 2310 while (!list_empty(&ring->request_list)) { 2311 struct drm_i915_gem_request *request; 2312 2313 request = list_first_entry(&ring->request_list, 2314 struct drm_i915_gem_request, 2315 list); 2316 2317 list_del(&request->list); 2318 i915_gem_request_remove_from_client(request); 2319 kfree(request); 2320 } 2321 2322 while (!list_empty(&ring->active_list)) { 2323 struct drm_i915_gem_object *obj; 2324 2325 obj = list_first_entry(&ring->active_list, 2326 struct drm_i915_gem_object, 2327 ring_list); 2328 2329 i915_gem_object_move_to_inactive(obj); 2330 } 2331 } 2332 2333 static void i915_gem_reset_fences(struct drm_device *dev) 2334 { 2335 struct drm_i915_private *dev_priv = dev->dev_private; 2336 int i; 2337 2338 for (i = 0; i < dev_priv->num_fence_regs; i++) { 2339 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; 2340 2341 i915_gem_write_fence(dev, i, NULL); 2342 2343 if (reg->obj) 2344 i915_gem_object_fence_lost(reg->obj); 2345 2346 reg->pin_count = 0; 2347 reg->obj = NULL; 2348 INIT_LIST_HEAD(®->lru_list); 2349 } 2350 2351 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 2352 } 2353 2354 void i915_gem_reset(struct drm_device *dev) 2355 { 2356 struct drm_i915_private *dev_priv = dev->dev_private; 2357 struct drm_i915_gem_object *obj; 2358 struct intel_ring_buffer *ring; 2359 int i; 2360 2361 for_each_ring(ring, dev_priv, i) 2362 i915_gem_reset_ring_lists(dev_priv, ring); 2363 2364 /* Move everything out of the GPU domains to ensure we do any 2365 * necessary invalidation upon reuse. 2366 */ 2367 list_for_each_entry(obj, 2368 &dev_priv->mm.inactive_list, 2369 mm_list) 2370 { 2371 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS; 2372 } 2373 2374 /* The fence registers are invalidated so clear them out */ 2375 i915_gem_reset_fences(dev); 2376 } 2377 2378 /** 2379 * This function clears the request list as sequence numbers are passed. 2380 */ 2381 void 2382 i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) 2383 { 2384 uint32_t seqno; 2385 2386 if (list_empty(&ring->request_list)) 2387 return; 2388 2389 WARN_ON(i915_verify_lists(ring->dev)); 2390 2391 seqno = ring->get_seqno(ring, true); 2392 2393 while (!list_empty(&ring->request_list)) { 2394 struct drm_i915_gem_request *request; 2395 2396 request = list_first_entry(&ring->request_list, 2397 struct drm_i915_gem_request, 2398 list); 2399 2400 if (!i915_seqno_passed(seqno, request->seqno)) 2401 break; 2402 2403 trace_i915_gem_request_retire(ring, request->seqno); 2404 /* We know the GPU must have read the request to have 2405 * sent us the seqno + interrupt, so use the position 2406 * of tail of the request to update the last known position 2407 * of the GPU head. 2408 */ 2409 ring->last_retired_head = request->tail; 2410 2411 list_del(&request->list); 2412 i915_gem_request_remove_from_client(request); 2413 kfree(request); 2414 } 2415 2416 /* Move any buffers on the active list that are no longer referenced 2417 * by the ringbuffer to the flushing/inactive lists as appropriate. 2418 */ 2419 while (!list_empty(&ring->active_list)) { 2420 struct drm_i915_gem_object *obj; 2421 2422 obj = list_first_entry(&ring->active_list, 2423 struct drm_i915_gem_object, 2424 ring_list); 2425 2426 if (!i915_seqno_passed(seqno, obj->last_read_seqno)) 2427 break; 2428 2429 i915_gem_object_move_to_inactive(obj); 2430 } 2431 2432 if (unlikely(ring->trace_irq_seqno && 2433 i915_seqno_passed(seqno, ring->trace_irq_seqno))) { 2434 ring->irq_put(ring); 2435 ring->trace_irq_seqno = 0; 2436 } 2437 2438 WARN_ON(i915_verify_lists(ring->dev)); 2439 } 2440 2441 void 2442 i915_gem_retire_requests(struct drm_device *dev) 2443 { 2444 drm_i915_private_t *dev_priv = dev->dev_private; 2445 struct intel_ring_buffer *ring; 2446 int i; 2447 2448 for_each_ring(ring, dev_priv, i) 2449 i915_gem_retire_requests_ring(ring); 2450 } 2451 2452 void 2453 i915_gem_retire_work_handler(void *arg1, void *unused) 2454 { 2455 drm_i915_private_t *dev_priv = arg1; 2456 struct drm_device *dev; 2457 struct intel_ring_buffer *ring; 2458 bool idle; 2459 int i; 2460 2461 dev = (struct drm_device *)dev_priv->drmdev; 2462 2463 /* Come back later if the device is busy... */ 2464 if (rw_enter(&dev->dev_lock, RW_NOSLEEP | RW_WRITE)) { 2465 timeout_add_sec(&dev_priv->mm.retire_timer, 1); 2466 return; 2467 } 2468 2469 i915_gem_retire_requests(dev); 2470 2471 /* Send a periodic flush down the ring so we don't hold onto GEM 2472 * objects indefinitely. 2473 */ 2474 idle = true; 2475 for_each_ring(ring, dev_priv, i) { 2476 if (ring->gpu_caches_dirty) 2477 i915_add_request(ring, NULL, NULL); 2478 2479 idle &= list_empty(&ring->request_list); 2480 } 2481 2482 if (!dev_priv->mm.suspended && !idle) 2483 timeout_add_sec(&dev_priv->mm.retire_timer, 1); 2484 if (idle) 2485 intel_mark_idle(dev); 2486 2487 DRM_UNLOCK(); 2488 } 2489 2490 /** 2491 * Ensures that an object will eventually get non-busy by flushing any required 2492 * write domains, emitting any outstanding lazy request and retiring and 2493 * completed requests. 2494 */ 2495 static int 2496 i915_gem_object_flush_active(struct drm_i915_gem_object *obj) 2497 { 2498 int ret; 2499 2500 if (obj->active) { 2501 ret = i915_gem_check_olr(obj->ring, obj->last_read_seqno); 2502 if (ret) 2503 return ret; 2504 2505 i915_gem_retire_requests_ring(obj->ring); 2506 } 2507 2508 return 0; 2509 } 2510 2511 /** 2512 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT 2513 * @DRM_IOCTL_ARGS: standard ioctl arguments 2514 * 2515 * Returns 0 if successful, else an error is returned with the remaining time in 2516 * the timeout parameter. 2517 * -ETIME: object is still busy after timeout 2518 * -ERESTARTSYS: signal interrupted the wait 2519 * -ENONENT: object doesn't exist 2520 * Also possible, but rare: 2521 * -EAGAIN: GPU wedged 2522 * -ENOMEM: damn 2523 * -ENODEV: Internal IRQ fail 2524 * -E?: The add request failed 2525 * 2526 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any 2527 * non-zero timeout parameter the wait ioctl will wait for the given number of 2528 * nanoseconds on an object becoming unbusy. Since the wait itself does so 2529 * without holding struct_mutex the object may become re-busied before this 2530 * function completes. A similar but shorter * race condition exists in the busy 2531 * ioctl 2532 */ 2533 int 2534 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 2535 { 2536 struct drm_i915_gem_wait *args = data; 2537 struct drm_i915_gem_object *obj; 2538 struct intel_ring_buffer *ring = NULL; 2539 struct timespec timeout_stack, *timeout = NULL; 2540 u32 seqno = 0; 2541 int ret = 0; 2542 2543 if (args->timeout_ns >= 0) { 2544 timeout_stack = ns_to_timespec(args->timeout_ns); 2545 timeout = &timeout_stack; 2546 } 2547 2548 ret = i915_mutex_lock_interruptible(dev); 2549 if (ret) 2550 return ret; 2551 2552 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->bo_handle)); 2553 if (&obj->base == NULL) { 2554 DRM_UNLOCK(); 2555 return -ENOENT; 2556 } 2557 2558 /* Need to make sure the object gets inactive eventually. */ 2559 ret = i915_gem_object_flush_active(obj); 2560 if (ret) 2561 goto out; 2562 2563 if (obj->active) { 2564 seqno = obj->last_read_seqno; 2565 ring = obj->ring; 2566 } 2567 2568 if (seqno == 0) 2569 goto out; 2570 2571 /* Do this after OLR check to make sure we make forward progress polling 2572 * on this IOCTL with a 0 timeout (like busy ioctl) 2573 */ 2574 if (!args->timeout_ns) { 2575 ret = -ETIMEDOUT; 2576 goto out; 2577 } 2578 2579 drm_gem_object_unreference(&obj->base); 2580 DRM_UNLOCK(); 2581 2582 ret = __wait_seqno(ring, seqno, true, timeout); 2583 if (timeout) { 2584 WARN_ON(!timespec_valid(timeout)); 2585 args->timeout_ns = timespec_to_ns(timeout); 2586 } 2587 return ret; 2588 2589 out: 2590 drm_gem_object_unreference(&obj->base); 2591 DRM_UNLOCK(); 2592 return ret; 2593 } 2594 2595 /** 2596 * i915_gem_object_sync - sync an object to a ring. 2597 * 2598 * @obj: object which may be in use on another ring. 2599 * @to: ring we wish to use the object on. May be NULL. 2600 * 2601 * This code is meant to abstract object synchronization with the GPU. 2602 * Calling with NULL implies synchronizing the object with the CPU 2603 * rather than a particular GPU ring. 2604 * 2605 * Returns 0 if successful, else propagates up the lower layer error. 2606 */ 2607 int 2608 i915_gem_object_sync(struct drm_i915_gem_object *obj, 2609 struct intel_ring_buffer *to) 2610 { 2611 struct intel_ring_buffer *from = obj->ring; 2612 u32 seqno; 2613 int ret, idx; 2614 2615 if (from == NULL || to == from) 2616 return 0; 2617 2618 if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev)) 2619 return i915_gem_object_wait_rendering(obj, false); 2620 2621 idx = intel_ring_sync_index(from, to); 2622 2623 seqno = obj->last_read_seqno; 2624 if (seqno <= from->sync_seqno[idx]) 2625 return 0; 2626 2627 ret = i915_gem_check_olr(obj->ring, seqno); 2628 if (ret) 2629 return ret; 2630 2631 ret = to->sync_to(to, from, seqno); 2632 if (!ret) 2633 /* We use last_read_seqno because sync_to() 2634 * might have just caused seqno wrap under 2635 * the radar. 2636 */ 2637 from->sync_seqno[idx] = obj->last_read_seqno; 2638 2639 return ret; 2640 } 2641 2642 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) 2643 { 2644 u32 old_write_domain, old_read_domains; 2645 2646 /* Act a barrier for all accesses through the GTT */ 2647 DRM_MEMORYBARRIER(); 2648 2649 /* Force a pagefault for domain tracking on next user access */ 2650 i915_gem_release_mmap(obj); 2651 2652 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 2653 return; 2654 2655 old_read_domains = obj->base.read_domains; 2656 old_write_domain = obj->base.write_domain; 2657 2658 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT; 2659 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT; 2660 2661 trace_i915_gem_object_change_domain(obj, 2662 old_read_domains, 2663 old_write_domain); 2664 } 2665 2666 /** 2667 * Unbinds an object from the GTT aperture. 2668 */ 2669 int 2670 i915_gem_object_unbind(struct drm_i915_gem_object *obj) 2671 { 2672 drm_i915_private_t *dev_priv = obj->base.dev->dev_private; 2673 int ret = 0; 2674 2675 if (obj->gtt_space == NULL) 2676 return 0; 2677 2678 if (obj->pin_count) 2679 return -EBUSY; 2680 2681 BUG_ON(obj->pages == NULL); 2682 2683 ret = i915_gem_object_finish_gpu(obj); 2684 if (ret) 2685 return ret; 2686 /* Continue on if we fail due to EIO, the GPU is hung so we 2687 * should be safe and we need to cleanup or else we might 2688 * cause memory corruption through use-after-free. 2689 */ 2690 2691 i915_gem_object_finish_gtt(obj); 2692 2693 /* release the fence reg _after_ flushing */ 2694 ret = i915_gem_object_put_fence(obj); 2695 if (ret) 2696 return ret; 2697 2698 trace_i915_gem_object_unbind(obj); 2699 2700 if (obj->has_global_gtt_mapping) 2701 i915_gem_gtt_unbind_object(obj); 2702 #ifdef notyet 2703 if (obj->has_aliasing_ppgtt_mapping) { 2704 i915_ppgtt_unbind_object(dev_priv->mm.aliasing_ppgtt, obj); 2705 obj->has_aliasing_ppgtt_mapping = 0; 2706 } 2707 #endif 2708 i915_gem_gtt_finish_object(obj); 2709 2710 list_del(&obj->mm_list); 2711 list_move_tail(&obj->gtt_list, &dev_priv->mm.unbound_list); 2712 /* Avoid an unnecessary call to unbind on rebind. */ 2713 obj->map_and_fenceable = true; 2714 2715 drm_mm_put_block(obj->gtt_space); 2716 obj->gtt_space = NULL; 2717 obj->gtt_offset = 0; 2718 2719 /* XXX Until we've hooked up the shrinking functions. */ 2720 i915_gem_object_put_pages(obj); 2721 2722 return 0; 2723 } 2724 2725 int i915_gpu_idle(struct drm_device *dev) 2726 { 2727 drm_i915_private_t *dev_priv = dev->dev_private; 2728 struct intel_ring_buffer *ring; 2729 int ret, i; 2730 2731 /* Flush everything onto the inactive list. */ 2732 for_each_ring(ring, dev_priv, i) { 2733 ret = i915_switch_context(ring, NULL, DEFAULT_CONTEXT_ID); 2734 if (ret) 2735 return ret; 2736 2737 ret = intel_ring_idle(ring); 2738 if (ret) 2739 return ret; 2740 } 2741 2742 return 0; 2743 } 2744 2745 static void i965_write_fence_reg(struct drm_device *dev, int reg, 2746 struct drm_i915_gem_object *obj) 2747 { 2748 drm_i915_private_t *dev_priv = dev->dev_private; 2749 int fence_reg; 2750 int fence_pitch_shift; 2751 2752 if (INTEL_INFO(dev)->gen >= 6) { 2753 fence_reg = FENCE_REG_SANDYBRIDGE_0; 2754 fence_pitch_shift = SANDYBRIDGE_FENCE_PITCH_SHIFT; 2755 } else { 2756 fence_reg = FENCE_REG_965_0; 2757 fence_pitch_shift = I965_FENCE_PITCH_SHIFT; 2758 } 2759 2760 fence_reg += reg * 8; 2761 2762 /* To w/a incoherency with non-atomic 64-bit register updates, 2763 * we split the 64-bit update into two 32-bit writes. In order 2764 * for a partial fence not to be evaluated between writes, we 2765 * precede the update with write to turn off the fence register, 2766 * and only enable the fence as the last step. 2767 * 2768 * For extra levels of paranoia, we make sure each step lands 2769 * before applying the next step. 2770 */ 2771 I915_WRITE(fence_reg, 0); 2772 POSTING_READ(fence_reg); 2773 2774 if (obj) { 2775 u32 size = obj->gtt_space->size; 2776 uint64_t val; 2777 2778 val = (uint64_t)((obj->gtt_offset + size - 4096) & 2779 0xfffff000) << 32; 2780 val |= obj->gtt_offset & 0xfffff000; 2781 val |= (uint64_t)((obj->stride / 128) - 1) << fence_pitch_shift; 2782 if (obj->tiling_mode == I915_TILING_Y) 2783 val |= 1 << I965_FENCE_TILING_Y_SHIFT; 2784 val |= I965_FENCE_REG_VALID; 2785 2786 I915_WRITE(fence_reg + 4, val >> 32); 2787 POSTING_READ(fence_reg + 4); 2788 2789 I915_WRITE(fence_reg + 0, val); 2790 POSTING_READ(fence_reg); 2791 } else { 2792 I915_WRITE(fence_reg + 4, 0); 2793 POSTING_READ(fence_reg + 4); 2794 } 2795 } 2796 2797 static void i915_write_fence_reg(struct drm_device *dev, int reg, 2798 struct drm_i915_gem_object *obj) 2799 { 2800 drm_i915_private_t *dev_priv = dev->dev_private; 2801 u32 val; 2802 2803 if (obj) { 2804 u32 size = obj->gtt_space->size; 2805 int pitch_val; 2806 int tile_width; 2807 2808 WARN((obj->gtt_offset & ~I915_FENCE_START_MASK) || 2809 (size & -size) != size || 2810 (obj->gtt_offset & (size - 1)), 2811 "object 0x%08x [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n", 2812 obj->gtt_offset, obj->map_and_fenceable, size); 2813 2814 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)) 2815 tile_width = 128; 2816 else 2817 tile_width = 512; 2818 2819 /* Note: pitch better be a power of two tile widths */ 2820 pitch_val = obj->stride / tile_width; 2821 pitch_val = ffs(pitch_val) - 1; 2822 2823 val = obj->gtt_offset; 2824 if (obj->tiling_mode == I915_TILING_Y) 2825 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 2826 val |= I915_FENCE_SIZE_BITS(size); 2827 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 2828 val |= I830_FENCE_REG_VALID; 2829 } else 2830 val = 0; 2831 2832 if (reg < 8) 2833 reg = FENCE_REG_830_0 + reg * 4; 2834 else 2835 reg = FENCE_REG_945_8 + (reg - 8) * 4; 2836 2837 I915_WRITE(reg, val); 2838 POSTING_READ(reg); 2839 } 2840 2841 static void i830_write_fence_reg(struct drm_device *dev, int reg, 2842 struct drm_i915_gem_object *obj) 2843 { 2844 drm_i915_private_t *dev_priv = dev->dev_private; 2845 uint32_t val; 2846 2847 if (obj) { 2848 u32 size = obj->gtt_space->size; 2849 uint32_t pitch_val; 2850 2851 WARN((obj->gtt_offset & ~I830_FENCE_START_MASK) || 2852 (size & -size) != size || 2853 (obj->gtt_offset & (size - 1)), 2854 "object 0x%08x not 512K or pot-size 0x%08x aligned\n", 2855 obj->gtt_offset, size); 2856 2857 pitch_val = obj->stride / 128; 2858 pitch_val = ffs(pitch_val) - 1; 2859 2860 val = obj->gtt_offset; 2861 if (obj->tiling_mode == I915_TILING_Y) 2862 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 2863 val |= I830_FENCE_SIZE_BITS(size); 2864 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 2865 val |= I830_FENCE_REG_VALID; 2866 } else 2867 val = 0; 2868 2869 I915_WRITE(FENCE_REG_830_0 + reg * 4, val); 2870 POSTING_READ(FENCE_REG_830_0 + reg * 4); 2871 } 2872 2873 static void i915_gem_write_fence(struct drm_device *dev, int reg, 2874 struct drm_i915_gem_object *obj) 2875 { 2876 switch (INTEL_INFO(dev)->gen) { 2877 case 7: 2878 case 6: 2879 case 5: 2880 case 4: i965_write_fence_reg(dev, reg, obj); break; 2881 case 3: i915_write_fence_reg(dev, reg, obj); break; 2882 case 2: i830_write_fence_reg(dev, reg, obj); break; 2883 default: break; 2884 } 2885 } 2886 2887 static inline int fence_number(struct drm_i915_private *dev_priv, 2888 struct drm_i915_fence_reg *fence) 2889 { 2890 return fence - dev_priv->fence_regs; 2891 } 2892 2893 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, 2894 struct drm_i915_fence_reg *fence, 2895 bool enable) 2896 { 2897 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2898 int reg = fence_number(dev_priv, fence); 2899 2900 i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL); 2901 2902 if (enable) { 2903 obj->fence_reg = reg; 2904 fence->obj = obj; 2905 list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list); 2906 } else { 2907 obj->fence_reg = I915_FENCE_REG_NONE; 2908 fence->obj = NULL; 2909 list_del_init(&fence->lru_list); 2910 } 2911 } 2912 2913 static int 2914 i915_gem_object_flush_fence(struct drm_i915_gem_object *obj) 2915 { 2916 if (obj->last_fenced_seqno) { 2917 int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno); 2918 if (ret) 2919 return ret; 2920 2921 obj->last_fenced_seqno = 0; 2922 } 2923 2924 /* Ensure that all CPU reads are completed before installing a fence 2925 * and all writes before removing the fence. 2926 */ 2927 if (obj->base.read_domains & I915_GEM_DOMAIN_GTT) 2928 DRM_WRITEMEMORYBARRIER(); 2929 2930 obj->fenced_gpu_access = false; 2931 return 0; 2932 } 2933 2934 int 2935 i915_gem_object_put_fence(struct drm_i915_gem_object *obj) 2936 { 2937 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2938 int ret; 2939 2940 ret = i915_gem_object_flush_fence(obj); 2941 if (ret) 2942 return ret; 2943 2944 if (obj->fence_reg == I915_FENCE_REG_NONE) 2945 return 0; 2946 2947 i915_gem_object_update_fence(obj, 2948 &dev_priv->fence_regs[obj->fence_reg], 2949 false); 2950 i915_gem_object_fence_lost(obj); 2951 2952 return 0; 2953 } 2954 2955 static struct drm_i915_fence_reg * 2956 i915_find_fence_reg(struct drm_device *dev) 2957 { 2958 struct drm_i915_private *dev_priv = dev->dev_private; 2959 struct drm_i915_fence_reg *reg, *avail; 2960 int i; 2961 2962 /* First try to find a free reg */ 2963 avail = NULL; 2964 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) { 2965 reg = &dev_priv->fence_regs[i]; 2966 if (!reg->obj) 2967 return reg; 2968 2969 if (!reg->pin_count) 2970 avail = reg; 2971 } 2972 2973 if (avail == NULL) 2974 return NULL; 2975 2976 /* None available, try to steal one or wait for a user to finish */ 2977 list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) { 2978 if (reg->pin_count) 2979 continue; 2980 2981 return reg; 2982 } 2983 2984 return NULL; 2985 } 2986 2987 /** 2988 * i915_gem_object_get_fence - set up fencing for an object 2989 * @obj: object to map through a fence reg 2990 * 2991 * When mapping objects through the GTT, userspace wants to be able to write 2992 * to them without having to worry about swizzling if the object is tiled. 2993 * This function walks the fence regs looking for a free one for @obj, 2994 * stealing one if it can't find any. 2995 * 2996 * It then sets up the reg based on the object's properties: address, pitch 2997 * and tiling format. 2998 * 2999 * For an untiled surface, this removes any existing fence. 3000 */ 3001 int 3002 i915_gem_object_get_fence(struct drm_i915_gem_object *obj) 3003 { 3004 struct drm_device *dev = obj->base.dev; 3005 struct drm_i915_private *dev_priv = dev->dev_private; 3006 bool enable = obj->tiling_mode != I915_TILING_NONE; 3007 struct drm_i915_fence_reg *reg; 3008 int ret; 3009 3010 /* Have we updated the tiling parameters upon the object and so 3011 * will need to serialise the write to the associated fence register? 3012 */ 3013 if (obj->fence_dirty) { 3014 ret = i915_gem_object_flush_fence(obj); 3015 if (ret) 3016 return ret; 3017 } 3018 3019 /* Just update our place in the LRU if our fence is getting reused. */ 3020 if (obj->fence_reg != I915_FENCE_REG_NONE) { 3021 reg = &dev_priv->fence_regs[obj->fence_reg]; 3022 if (!obj->fence_dirty) { 3023 list_move_tail(®->lru_list, 3024 &dev_priv->mm.fence_list); 3025 return 0; 3026 } 3027 } else if (enable) { 3028 reg = i915_find_fence_reg(dev); 3029 if (reg == NULL) 3030 return -EDEADLK; 3031 3032 if (reg->obj) { 3033 struct drm_i915_gem_object *old = reg->obj; 3034 3035 ret = i915_gem_object_flush_fence(old); 3036 if (ret) 3037 return ret; 3038 3039 i915_gem_object_fence_lost(old); 3040 } 3041 } else 3042 return 0; 3043 3044 i915_gem_object_update_fence(obj, reg, enable); 3045 obj->fence_dirty = false; 3046 3047 return 0; 3048 } 3049 3050 static bool i915_gem_valid_gtt_space(struct drm_device *dev, 3051 struct drm_mm_node *gtt_space, 3052 unsigned long cache_level) 3053 { 3054 struct drm_mm_node *other; 3055 3056 /* On non-LLC machines we have to be careful when putting differing 3057 * types of snoopable memory together to avoid the prefetcher 3058 * crossing memory domains and dieing. 3059 */ 3060 if (HAS_LLC(dev)) 3061 return true; 3062 3063 if (gtt_space == NULL) 3064 return true; 3065 3066 if (list_empty(>t_space->node_list)) 3067 return true; 3068 3069 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list); 3070 if (other->allocated && !other->hole_follows && other->color != cache_level) 3071 return false; 3072 3073 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list); 3074 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level) 3075 return false; 3076 3077 return true; 3078 } 3079 3080 static void i915_gem_verify_gtt(struct drm_device *dev) 3081 { 3082 } 3083 3084 /** 3085 * Finds free space in the GTT aperture and binds the object there. 3086 */ 3087 static int 3088 i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, 3089 unsigned alignment, 3090 bool map_and_fenceable, 3091 bool nonblocking) 3092 { 3093 struct drm_device *dev = obj->base.dev; 3094 drm_i915_private_t *dev_priv = dev->dev_private; 3095 struct drm_mm_node *node; 3096 u32 size, fence_size, fence_alignment, unfenced_alignment; 3097 bool mappable, fenceable; 3098 int ret; 3099 3100 if (obj->madv != I915_MADV_WILLNEED) { 3101 DRM_ERROR("Attempting to bind a purgeable object\n"); 3102 return -EINVAL; 3103 } 3104 3105 fence_size = i915_gem_get_gtt_size(dev, 3106 obj->base.size, 3107 obj->tiling_mode); 3108 fence_alignment = i915_gem_get_gtt_alignment(dev, 3109 obj->base.size, 3110 obj->tiling_mode); 3111 unfenced_alignment = 3112 i915_gem_get_unfenced_gtt_alignment(dev, 3113 obj->base.size, 3114 obj->tiling_mode); 3115 3116 if (alignment == 0) 3117 alignment = map_and_fenceable ? fence_alignment : 3118 unfenced_alignment; 3119 if (map_and_fenceable && alignment & (fence_alignment - 1)) { 3120 DRM_ERROR("Invalid object alignment requested %u\n", alignment); 3121 return -EINVAL; 3122 } 3123 3124 size = map_and_fenceable ? fence_size : obj->base.size; 3125 3126 /* If the object is bigger than the entire aperture, reject it early 3127 * before evicting everything in a vain attempt to find space. 3128 */ 3129 if (obj->base.size > 3130 (map_and_fenceable ? dev_priv->mm.gtt_mappable_end : dev_priv->mm.gtt_total)) { 3131 DRM_ERROR("Attempting to bind an object larger than the aperture\n"); 3132 return -E2BIG; 3133 } 3134 3135 ret = i915_gem_object_get_pages(obj); 3136 if (ret) 3137 return ret; 3138 3139 i915_gem_object_pin_pages(obj); 3140 3141 node = kzalloc(sizeof(*node), GFP_KERNEL); 3142 if (node == NULL) { 3143 i915_gem_object_unpin_pages(obj); 3144 /* XXX Until we've hooked up the shrinking functions. */ 3145 i915_gem_object_put_pages(obj); 3146 return -ENOMEM; 3147 } 3148 3149 search_free: 3150 if (map_and_fenceable) 3151 ret = drm_mm_insert_node_in_range_generic(&dev_priv->mm.gtt_space, node, 3152 size, alignment, obj->cache_level, 3153 0, dev_priv->mm.gtt_mappable_end); 3154 else 3155 ret = drm_mm_insert_node_generic(&dev_priv->mm.gtt_space, node, 3156 size, alignment, obj->cache_level); 3157 if (ret) { 3158 ret = i915_gem_evict_something(dev, size, alignment, 3159 obj->cache_level, 3160 map_and_fenceable, 3161 nonblocking); 3162 if (ret == 0) 3163 goto search_free; 3164 3165 i915_gem_object_unpin_pages(obj); 3166 /* XXX Until we've hooked up the shrinking functions. */ 3167 i915_gem_object_put_pages(obj); 3168 kfree(node); 3169 return ret; 3170 } 3171 if (WARN_ON(!i915_gem_valid_gtt_space(dev, node, obj->cache_level))) { 3172 i915_gem_object_unpin_pages(obj); 3173 /* XXX Until we've hooked up the shrinking functions. */ 3174 i915_gem_object_put_pages(obj); 3175 drm_mm_put_block(node); 3176 return -EINVAL; 3177 } 3178 3179 ret = i915_gem_gtt_prepare_object(obj); 3180 if (ret) { 3181 i915_gem_object_unpin_pages(obj); 3182 /* XXX Until we've hooked up the shrinking functions. */ 3183 i915_gem_object_put_pages(obj); 3184 drm_mm_put_block(node); 3185 return ret; 3186 } 3187 3188 list_move_tail(&obj->gtt_list, &dev_priv->mm.bound_list); 3189 list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list); 3190 3191 obj->gtt_space = node; 3192 obj->gtt_offset = node->start; 3193 3194 fenceable = 3195 node->size == fence_size && 3196 (node->start & (fence_alignment - 1)) == 0; 3197 3198 mappable = 3199 obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end; 3200 3201 obj->map_and_fenceable = mappable && fenceable; 3202 3203 i915_gem_object_unpin_pages(obj); 3204 trace_i915_gem_object_bind(obj, map_and_fenceable); 3205 i915_gem_verify_gtt(dev); 3206 return 0; 3207 } 3208 3209 void 3210 i915_gem_clflush_object(struct drm_i915_gem_object *obj) 3211 { 3212 /* If we don't have a page list set up, then we're not pinned 3213 * to GPU, and we can ignore the cache flush because it'll happen 3214 * again at bind time. 3215 */ 3216 if (obj->pages == NULL) 3217 return; 3218 3219 /* If the GPU is snooping the contents of the CPU cache, 3220 * we do not need to manually clear the CPU cache lines. However, 3221 * the caches are only snooped when the render cache is 3222 * flushed/invalidated. As we always have to emit invalidations 3223 * and flushes when moving into and out of the RENDER domain, correct 3224 * snooping behaviour occurs naturally as the result of our domain 3225 * tracking. 3226 */ 3227 if (obj->cache_level != I915_CACHE_NONE) 3228 return; 3229 3230 #if 0 3231 trace_i915_gem_object_clflush(obj); 3232 3233 drm_clflush_sg(obj->pages); 3234 #else 3235 { 3236 int page_count = obj->base.size >> PAGE_SHIFT; 3237 int i; 3238 3239 for (i = 0; i < page_count; i++) 3240 pmap_flush_page(VM_PAGE_TO_PHYS(obj->pages[i])); 3241 } 3242 #endif 3243 } 3244 3245 /** Flushes the GTT write domain for the object if it's dirty. */ 3246 static void 3247 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) 3248 { 3249 uint32_t old_write_domain; 3250 3251 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) 3252 return; 3253 3254 /* No actual flushing is required for the GTT write domain. Writes 3255 * to it immediately go to main memory as far as we know, so there's 3256 * no chipset flush. It also doesn't land in render cache. 3257 * 3258 * However, we do have to enforce the order so that all writes through 3259 * the GTT land before any writes to the device, such as updates to 3260 * the GATT itself. 3261 */ 3262 DRM_WRITEMEMORYBARRIER(); 3263 3264 old_write_domain = obj->base.write_domain; 3265 obj->base.write_domain = 0; 3266 3267 trace_i915_gem_object_change_domain(obj, 3268 obj->base.read_domains, 3269 old_write_domain); 3270 } 3271 3272 /** Flushes the CPU write domain for the object if it's dirty. */ 3273 static void 3274 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) 3275 { 3276 uint32_t old_write_domain; 3277 3278 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) 3279 return; 3280 3281 i915_gem_clflush_object(obj); 3282 i915_gem_chipset_flush(obj->base.dev); 3283 old_write_domain = obj->base.write_domain; 3284 obj->base.write_domain = 0; 3285 3286 trace_i915_gem_object_change_domain(obj, 3287 obj->base.read_domains, 3288 old_write_domain); 3289 } 3290 3291 /** 3292 * Moves a single object to the GTT read, and possibly write domain. 3293 * 3294 * This function returns when the move is complete, including waiting on 3295 * flushes to occur. 3296 */ 3297 int 3298 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 3299 { 3300 drm_i915_private_t *dev_priv = obj->base.dev->dev_private; 3301 uint32_t old_write_domain, old_read_domains; 3302 int ret; 3303 3304 /* Not valid to be called on unbound objects. */ 3305 if (obj->gtt_space == NULL) 3306 return -EINVAL; 3307 3308 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) 3309 return 0; 3310 3311 ret = i915_gem_object_wait_rendering(obj, !write); 3312 if (ret) 3313 return ret; 3314 3315 i915_gem_object_flush_cpu_write_domain(obj); 3316 3317 old_write_domain = obj->base.write_domain; 3318 old_read_domains = obj->base.read_domains; 3319 3320 /* It should now be out of any other write domains, and we can update 3321 * the domain values for our changes. 3322 */ 3323 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 3324 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 3325 if (write) { 3326 obj->base.read_domains = I915_GEM_DOMAIN_GTT; 3327 obj->base.write_domain = I915_GEM_DOMAIN_GTT; 3328 obj->dirty = 1; 3329 } 3330 3331 trace_i915_gem_object_change_domain(obj, 3332 old_read_domains, 3333 old_write_domain); 3334 3335 /* And bump the LRU for this access */ 3336 if (i915_gem_object_is_inactive(obj)) 3337 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list); 3338 3339 return 0; 3340 } 3341 3342 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 3343 enum i915_cache_level cache_level) 3344 { 3345 struct drm_device *dev = obj->base.dev; 3346 #ifdef notyet 3347 drm_i915_private_t *dev_priv = dev->dev_private; 3348 #endif 3349 int ret; 3350 3351 if (obj->cache_level == cache_level) 3352 return 0; 3353 3354 if (obj->pin_count) { 3355 DRM_DEBUG("can not change the cache level of pinned objects\n"); 3356 return -EBUSY; 3357 } 3358 3359 if (!i915_gem_valid_gtt_space(dev, obj->gtt_space, cache_level)) { 3360 ret = i915_gem_object_unbind(obj); 3361 if (ret) 3362 return ret; 3363 } 3364 3365 if (obj->gtt_space) { 3366 ret = i915_gem_object_finish_gpu(obj); 3367 if (ret) 3368 return ret; 3369 3370 i915_gem_object_finish_gtt(obj); 3371 3372 /* Before SandyBridge, you could not use tiling or fence 3373 * registers with snooped memory, so relinquish any fences 3374 * currently pointing to our region in the aperture. 3375 */ 3376 if (INTEL_INFO(dev)->gen < 6) { 3377 ret = i915_gem_object_put_fence(obj); 3378 if (ret) 3379 return ret; 3380 } 3381 3382 if (obj->has_global_gtt_mapping) 3383 i915_gem_gtt_bind_object(obj, cache_level); 3384 #ifdef notyet 3385 if (obj->has_aliasing_ppgtt_mapping) 3386 i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt, 3387 obj, cache_level); 3388 #endif 3389 3390 obj->gtt_space->color = cache_level; 3391 } 3392 3393 if (cache_level == I915_CACHE_NONE) { 3394 u32 old_read_domains, old_write_domain; 3395 3396 /* If we're coming from LLC cached, then we haven't 3397 * actually been tracking whether the data is in the 3398 * CPU cache or not, since we only allow one bit set 3399 * in obj->write_domain and have been skipping the clflushes. 3400 * Just set it to the CPU cache for now. 3401 */ 3402 WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU); 3403 WARN_ON(obj->base.read_domains & ~I915_GEM_DOMAIN_CPU); 3404 3405 old_read_domains = obj->base.read_domains; 3406 old_write_domain = obj->base.write_domain; 3407 3408 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 3409 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 3410 3411 trace_i915_gem_object_change_domain(obj, 3412 old_read_domains, 3413 old_write_domain); 3414 } 3415 3416 obj->cache_level = cache_level; 3417 i915_gem_verify_gtt(dev); 3418 return 0; 3419 } 3420 3421 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 3422 struct drm_file *file) 3423 { 3424 struct drm_i915_gem_caching *args = data; 3425 struct drm_i915_gem_object *obj; 3426 int ret; 3427 3428 ret = i915_mutex_lock_interruptible(dev); 3429 if (ret) 3430 return ret; 3431 3432 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3433 if (&obj->base == NULL) { 3434 ret = -ENOENT; 3435 goto unlock; 3436 } 3437 3438 args->caching = obj->cache_level != I915_CACHE_NONE; 3439 3440 drm_gem_object_unreference(&obj->base); 3441 unlock: 3442 DRM_UNLOCK(); 3443 return ret; 3444 } 3445 3446 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 3447 struct drm_file *file) 3448 { 3449 struct drm_i915_gem_caching *args = data; 3450 struct drm_i915_gem_object *obj; 3451 enum i915_cache_level level; 3452 int ret; 3453 3454 switch (args->caching) { 3455 case I915_CACHING_NONE: 3456 level = I915_CACHE_NONE; 3457 break; 3458 case I915_CACHING_CACHED: 3459 level = I915_CACHE_LLC; 3460 break; 3461 default: 3462 return -EINVAL; 3463 } 3464 3465 ret = i915_mutex_lock_interruptible(dev); 3466 if (ret) 3467 return ret; 3468 3469 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3470 if (&obj->base == NULL) { 3471 ret = -ENOENT; 3472 goto unlock; 3473 } 3474 3475 ret = i915_gem_object_set_cache_level(obj, level); 3476 3477 drm_gem_object_unreference(&obj->base); 3478 unlock: 3479 DRM_UNLOCK(); 3480 return ret; 3481 } 3482 3483 /* 3484 * Prepare buffer for display plane (scanout, cursors, etc). 3485 * Can be called from an uninterruptible phase (modesetting) and allows 3486 * any flushes to be pipelined (for pageflips). 3487 */ 3488 int 3489 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 3490 u32 alignment, 3491 struct intel_ring_buffer *pipelined) 3492 { 3493 u32 old_read_domains, old_write_domain; 3494 int ret; 3495 3496 if (pipelined != obj->ring) { 3497 ret = i915_gem_object_sync(obj, pipelined); 3498 if (ret) 3499 return ret; 3500 } 3501 3502 /* The display engine is not coherent with the LLC cache on gen6. As 3503 * a result, we make sure that the pinning that is about to occur is 3504 * done with uncached PTEs. This is lowest common denominator for all 3505 * chipsets. 3506 * 3507 * However for gen6+, we could do better by using the GFDT bit instead 3508 * of uncaching, which would allow us to flush all the LLC-cached data 3509 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 3510 */ 3511 ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE); 3512 if (ret) 3513 return ret; 3514 3515 /* As the user may map the buffer once pinned in the display plane 3516 * (e.g. libkms for the bootup splash), we have to ensure that we 3517 * always use map_and_fenceable for all scanout buffers. 3518 */ 3519 ret = i915_gem_object_pin(obj, alignment, true, false); 3520 if (ret) 3521 return ret; 3522 3523 i915_gem_object_flush_cpu_write_domain(obj); 3524 3525 old_write_domain = obj->base.write_domain; 3526 old_read_domains = obj->base.read_domains; 3527 3528 /* It should now be out of any other write domains, and we can update 3529 * the domain values for our changes. 3530 */ 3531 obj->base.write_domain = 0; 3532 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 3533 3534 trace_i915_gem_object_change_domain(obj, 3535 old_read_domains, 3536 old_write_domain); 3537 3538 return 0; 3539 } 3540 3541 int 3542 i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj) 3543 { 3544 int ret; 3545 3546 if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0) 3547 return 0; 3548 3549 ret = i915_gem_object_wait_rendering(obj, false); 3550 if (ret) 3551 return ret; 3552 3553 /* Ensure that we invalidate the GPU's caches and TLBs. */ 3554 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS; 3555 return 0; 3556 } 3557 3558 /** 3559 * Moves a single object to the CPU read, and possibly write domain. 3560 * 3561 * This function returns when the move is complete, including waiting on 3562 * flushes to occur. 3563 */ 3564 int 3565 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 3566 { 3567 uint32_t old_write_domain, old_read_domains; 3568 int ret; 3569 3570 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) 3571 return 0; 3572 3573 ret = i915_gem_object_wait_rendering(obj, !write); 3574 if (ret) 3575 return ret; 3576 3577 i915_gem_object_flush_gtt_write_domain(obj); 3578 3579 old_write_domain = obj->base.write_domain; 3580 old_read_domains = obj->base.read_domains; 3581 3582 /* Flush the CPU cache if it's still invalid. */ 3583 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { 3584 i915_gem_clflush_object(obj); 3585 3586 obj->base.read_domains |= I915_GEM_DOMAIN_CPU; 3587 } 3588 3589 /* It should now be out of any other write domains, and we can update 3590 * the domain values for our changes. 3591 */ 3592 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 3593 3594 /* If we're writing through the CPU, then the GPU read domains will 3595 * need to be invalidated at next use. 3596 */ 3597 if (write) { 3598 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 3599 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 3600 } 3601 3602 trace_i915_gem_object_change_domain(obj, 3603 old_read_domains, 3604 old_write_domain); 3605 3606 return 0; 3607 } 3608 3609 /* Throttle our rendering by waiting until the ring has completed our requests 3610 * emitted over 20 msec ago. 3611 * 3612 * Note that if we were to use the current jiffies each time around the loop, 3613 * we wouldn't escape the function with any frames outstanding if the time to 3614 * render a frame was over 20ms. 3615 * 3616 * This should get us reasonable parallelism between CPU and GPU but also 3617 * relatively low latency when blocking on a particular request to finish. 3618 */ 3619 int 3620 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 3621 { 3622 struct drm_i915_private *dev_priv = dev->dev_private; 3623 struct drm_i915_file_private *file_priv = file->driver_priv; 3624 unsigned long recent_enough = ticks - msecs_to_jiffies(20); 3625 struct drm_i915_gem_request *request; 3626 struct intel_ring_buffer *ring = NULL; 3627 u32 seqno = 0; 3628 int ret; 3629 3630 if (atomic_read(&dev_priv->mm.wedged)) 3631 return -EIO; 3632 3633 mtx_enter(&file_priv->mm.lock); 3634 list_for_each_entry(request, &file_priv->mm.request_list, client_list) { 3635 if (time_after_eq(request->emitted_ticks, recent_enough)) 3636 break; 3637 3638 ring = request->ring; 3639 seqno = request->seqno; 3640 } 3641 mtx_leave(&file_priv->mm.lock); 3642 3643 if (seqno == 0) 3644 return 0; 3645 3646 ret = __wait_seqno(ring, seqno, true, NULL); 3647 if (ret == 0) 3648 timeout_add_sec(&dev_priv->mm.retire_timer, 0); 3649 3650 return ret; 3651 } 3652 3653 int 3654 i915_gem_object_pin(struct drm_i915_gem_object *obj, 3655 uint32_t alignment, 3656 bool map_and_fenceable, 3657 bool nonblocking) 3658 { 3659 int ret; 3660 3661 if (WARN_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) 3662 return -EBUSY; 3663 3664 if (obj->gtt_space != NULL) { 3665 if ((alignment && obj->gtt_offset & (alignment - 1)) || 3666 (map_and_fenceable && !obj->map_and_fenceable)) { 3667 WARN(obj->pin_count, 3668 "bo is already pinned with incorrect alignment:" 3669 " offset=%x, req.alignment=%x, req.map_and_fenceable=%d," 3670 " obj->map_and_fenceable=%d\n", 3671 obj->gtt_offset, alignment, 3672 map_and_fenceable, 3673 obj->map_and_fenceable); 3674 ret = i915_gem_object_unbind(obj); 3675 if (ret) 3676 return ret; 3677 } 3678 } 3679 3680 if (obj->gtt_space == NULL) { 3681 #ifdef notyet 3682 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3683 #endif 3684 3685 ret = i915_gem_object_bind_to_gtt(obj, alignment, 3686 map_and_fenceable, 3687 nonblocking); 3688 if (ret) 3689 return ret; 3690 3691 #ifdef notyet 3692 if (!dev_priv->mm.aliasing_ppgtt) 3693 #endif 3694 i915_gem_gtt_bind_object(obj, obj->cache_level); 3695 } 3696 3697 if (!obj->has_global_gtt_mapping && map_and_fenceable) 3698 i915_gem_gtt_bind_object(obj, obj->cache_level); 3699 3700 obj->pin_count++; 3701 obj->pin_mappable |= map_and_fenceable; 3702 3703 return 0; 3704 } 3705 3706 void 3707 i915_gem_object_unpin(struct drm_i915_gem_object *obj) 3708 { 3709 BUG_ON(obj->pin_count == 0); 3710 BUG_ON(obj->gtt_space == NULL); 3711 3712 if (--obj->pin_count == 0) 3713 obj->pin_mappable = false; 3714 } 3715 3716 int 3717 i915_gem_pin_ioctl(struct drm_device *dev, void *data, 3718 struct drm_file *file) 3719 { 3720 struct drm_i915_gem_pin *args = data; 3721 struct drm_i915_gem_object *obj; 3722 int ret; 3723 3724 ret = i915_mutex_lock_interruptible(dev); 3725 if (ret) 3726 return ret; 3727 3728 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3729 if (&obj->base == NULL) { 3730 ret = -ENOENT; 3731 goto unlock; 3732 } 3733 3734 if (obj->madv != I915_MADV_WILLNEED) { 3735 DRM_ERROR("Attempting to pin a purgeable buffer\n"); 3736 ret = -EINVAL; 3737 goto out; 3738 } 3739 3740 if (obj->pin_filp != NULL && obj->pin_filp != file) { 3741 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n", 3742 args->handle); 3743 ret = -EINVAL; 3744 goto out; 3745 } 3746 3747 if (obj->user_pin_count == 0) { 3748 ret = i915_gem_object_pin(obj, args->alignment, true, false); 3749 if (ret) 3750 goto out; 3751 } 3752 3753 obj->user_pin_count++; 3754 obj->pin_filp = file; 3755 3756 /* XXX - flush the CPU caches for pinned objects 3757 * as the X server doesn't manage domains yet 3758 */ 3759 i915_gem_object_flush_cpu_write_domain(obj); 3760 args->offset = obj->gtt_offset; 3761 out: 3762 drm_gem_object_unreference(&obj->base); 3763 unlock: 3764 DRM_UNLOCK(); 3765 return ret; 3766 } 3767 3768 int 3769 i915_gem_unpin_ioctl(struct drm_device *dev, void *data, 3770 struct drm_file *file) 3771 { 3772 struct drm_i915_gem_pin *args = data; 3773 struct drm_i915_gem_object *obj; 3774 int ret; 3775 3776 ret = i915_mutex_lock_interruptible(dev); 3777 if (ret) 3778 return ret; 3779 3780 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3781 if (&obj->base == NULL) { 3782 ret = -ENOENT; 3783 goto unlock; 3784 } 3785 3786 if (obj->pin_filp != file) { 3787 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n", 3788 args->handle); 3789 ret = -EINVAL; 3790 goto out; 3791 } 3792 obj->user_pin_count--; 3793 if (obj->user_pin_count == 0) { 3794 obj->pin_filp = NULL; 3795 i915_gem_object_unpin(obj); 3796 } 3797 3798 out: 3799 drm_gem_object_unreference(&obj->base); 3800 unlock: 3801 DRM_UNLOCK(); 3802 return ret; 3803 } 3804 3805 int 3806 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 3807 struct drm_file *file) 3808 { 3809 struct drm_i915_gem_busy *args = data; 3810 struct drm_i915_gem_object *obj; 3811 int ret; 3812 3813 ret = i915_mutex_lock_interruptible(dev); 3814 if (ret) 3815 return ret; 3816 3817 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3818 if (&obj->base == NULL) { 3819 ret = -ENOENT; 3820 goto unlock; 3821 } 3822 3823 /* Count all active objects as busy, even if they are currently not used 3824 * by the gpu. Users of this interface expect objects to eventually 3825 * become non-busy without any further actions, therefore emit any 3826 * necessary flushes here. 3827 */ 3828 ret = i915_gem_object_flush_active(obj); 3829 3830 args->busy = obj->active; 3831 if (obj->ring) { 3832 // BUILD_BUG_ON(I915_NUM_RINGS > 16); 3833 args->busy |= intel_ring_flag(obj->ring) << 16; 3834 } 3835 3836 drm_gem_object_unreference(&obj->base); 3837 unlock: 3838 DRM_UNLOCK(); 3839 return ret; 3840 } 3841 3842 #ifdef notyet 3843 int 3844 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 3845 struct drm_file *file_priv) 3846 { 3847 return i915_gem_ring_throttle(dev, file_priv); 3848 } 3849 #endif 3850 3851 int 3852 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 3853 struct drm_file *file_priv) 3854 { 3855 struct drm_i915_gem_madvise *args = data; 3856 struct drm_i915_gem_object *obj; 3857 int ret; 3858 3859 switch (args->madv) { 3860 case I915_MADV_DONTNEED: 3861 case I915_MADV_WILLNEED: 3862 break; 3863 default: 3864 return -EINVAL; 3865 } 3866 3867 ret = i915_mutex_lock_interruptible(dev); 3868 if (ret) 3869 return ret; 3870 3871 obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle)); 3872 if (&obj->base == NULL) { 3873 ret = -ENOENT; 3874 goto unlock; 3875 } 3876 3877 if (obj->pin_count) { 3878 ret = -EINVAL; 3879 goto out; 3880 } 3881 3882 if (obj->madv != __I915_MADV_PURGED) 3883 obj->madv = args->madv; 3884 3885 /* if the object is no longer attached, discard its backing storage */ 3886 if (i915_gem_object_is_purgeable(obj) && obj->pages == NULL) 3887 i915_gem_object_truncate(obj); 3888 3889 args->retained = obj->madv != __I915_MADV_PURGED; 3890 3891 out: 3892 drm_gem_object_unreference(&obj->base); 3893 unlock: 3894 DRM_UNLOCK(); 3895 return ret; 3896 } 3897 3898 void i915_gem_object_init(struct drm_i915_gem_object *obj, 3899 const struct drm_i915_gem_object_ops *ops) 3900 { 3901 INIT_LIST_HEAD(&obj->mm_list); 3902 INIT_LIST_HEAD(&obj->gtt_list); 3903 INIT_LIST_HEAD(&obj->ring_list); 3904 INIT_LIST_HEAD(&obj->exec_list); 3905 3906 obj->ops = ops; 3907 3908 obj->fence_reg = I915_FENCE_REG_NONE; 3909 obj->madv = I915_MADV_WILLNEED; 3910 /* Avoid an unnecessary call to unbind on the first bind. */ 3911 obj->map_and_fenceable = true; 3912 3913 i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size); 3914 } 3915 3916 static const struct drm_i915_gem_object_ops i915_gem_object_ops = { 3917 .get_pages = i915_gem_object_get_pages_gtt, 3918 .put_pages = i915_gem_object_put_pages_gtt, 3919 }; 3920 3921 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, 3922 size_t size) 3923 { 3924 struct drm_i915_gem_object *obj; 3925 3926 obj = pool_get(&dev->objpl, PR_WAITOK | PR_ZERO); 3927 if (obj == NULL) 3928 return NULL; 3929 3930 if (drm_gem_object_init(dev, &obj->base, size) != 0) { 3931 pool_put(&dev->objpl, obj); 3932 return NULL; 3933 } 3934 3935 i915_gem_object_init(obj, &i915_gem_object_ops); 3936 3937 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 3938 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 3939 3940 if (HAS_LLC(dev)) { 3941 /* On some devices, we can have the GPU use the LLC (the CPU 3942 * cache) for about a 10% performance improvement 3943 * compared to uncached. Graphics requests other than 3944 * display scanout are coherent with the CPU in 3945 * accessing this cache. This means in this mode we 3946 * don't need to clflush on the CPU side, and on the 3947 * GPU side we only need to flush internal caches to 3948 * get data visible to the CPU. 3949 * 3950 * However, we maintain the display planes as UC, and so 3951 * need to rebind when first used as such. 3952 */ 3953 obj->cache_level = I915_CACHE_LLC; 3954 } else 3955 obj->cache_level = I915_CACHE_NONE; 3956 3957 return obj; 3958 } 3959 3960 int i915_gem_init_object(struct drm_gem_object *obj) 3961 { 3962 BUG(); 3963 3964 return 0; 3965 } 3966 3967 void i915_gem_free_object(struct drm_gem_object *gem_obj) 3968 { 3969 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 3970 struct drm_device *dev = obj->base.dev; 3971 drm_i915_private_t *dev_priv = dev->dev_private; 3972 3973 if (obj->phys_obj) 3974 i915_gem_detach_phys_object(dev, obj); 3975 3976 obj->pin_count = 0; 3977 i915_gem_object_unbind(obj); 3978 3979 obj->pages_pin_count = 0; 3980 i915_gem_object_put_pages(obj); 3981 i915_gem_object_free_mmap_offset(obj); 3982 3983 BUG_ON(obj->pages); 3984 3985 drm_gem_object_release(&obj->base); 3986 i915_gem_info_remove_obj(dev_priv, obj->base.size); 3987 3988 kfree(obj->bit_17); 3989 pool_put(&dev->objpl, obj); 3990 } 3991 3992 int 3993 i915_gem_idle(struct drm_device *dev) 3994 { 3995 drm_i915_private_t *dev_priv = dev->dev_private; 3996 int ret; 3997 3998 DRM_LOCK(); 3999 4000 if (dev_priv->mm.suspended) { 4001 DRM_UNLOCK(); 4002 return 0; 4003 } 4004 4005 ret = i915_gpu_idle(dev); 4006 if (ret) { 4007 DRM_UNLOCK(); 4008 return ret; 4009 } 4010 i915_gem_retire_requests(dev); 4011 4012 /* Under UMS, be paranoid and evict. */ 4013 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 4014 i915_gem_evict_everything(dev); 4015 4016 i915_gem_reset_fences(dev); 4017 4018 /* Hack! Don't let anybody do execbuf while we don't control the chip. 4019 * We need to replace this with a semaphore, or something. 4020 * And not confound mm.suspended! 4021 */ 4022 dev_priv->mm.suspended = 1; 4023 timeout_del(&dev_priv->hangcheck_timer); 4024 4025 i915_kernel_lost_context(dev); 4026 i915_gem_cleanup_ringbuffer(dev); 4027 4028 DRM_UNLOCK(); 4029 4030 /* Cancel the retire work handler, which should be idle now. */ 4031 timeout_del(&dev_priv->mm.retire_timer); 4032 task_del(dev_priv->mm.retire_taskq, &dev_priv->mm.retire_task); 4033 4034 return 0; 4035 } 4036 4037 #ifdef notyet 4038 void i915_gem_l3_remap(struct drm_device *dev) 4039 { 4040 drm_i915_private_t *dev_priv = dev->dev_private; 4041 u32 misccpctl; 4042 int i; 4043 4044 if (!HAS_L3_GPU_CACHE(dev)) 4045 return; 4046 4047 if (!dev_priv->l3_parity.remap_info) 4048 return; 4049 4050 misccpctl = I915_READ(GEN7_MISCCPCTL); 4051 I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE); 4052 POSTING_READ(GEN7_MISCCPCTL); 4053 4054 for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) { 4055 u32 remap = I915_READ(GEN7_L3LOG_BASE + i); 4056 if (remap && remap != dev_priv->l3_parity.remap_info[i/4]) 4057 DRM_DEBUG("0x%x was already programmed to %x\n", 4058 GEN7_L3LOG_BASE + i, remap); 4059 if (remap && !dev_priv->l3_parity.remap_info[i/4]) 4060 DRM_DEBUG_DRIVER("Clearing remapped register\n"); 4061 I915_WRITE(GEN7_L3LOG_BASE + i, dev_priv->l3_parity.remap_info[i/4]); 4062 } 4063 4064 /* Make sure all the writes land before disabling dop clock gating */ 4065 POSTING_READ(GEN7_L3LOG_BASE); 4066 4067 I915_WRITE(GEN7_MISCCPCTL, misccpctl); 4068 } 4069 #endif /* notyet */ 4070 4071 void i915_gem_init_swizzling(struct drm_device *dev) 4072 { 4073 drm_i915_private_t *dev_priv = dev->dev_private; 4074 4075 if (INTEL_INFO(dev)->gen < 5 || 4076 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 4077 return; 4078 4079 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 4080 DISP_TILE_SURFACE_SWIZZLING); 4081 4082 if (IS_GEN5(dev)) 4083 return; 4084 4085 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 4086 if (IS_GEN6(dev)) 4087 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 4088 else 4089 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 4090 } 4091 4092 static bool 4093 intel_enable_blt(struct drm_device *dev) 4094 { 4095 if (!HAS_BLT(dev)) 4096 return false; 4097 4098 #ifdef notyet 4099 /* The blitter was dysfunctional on early prototypes */ 4100 if (IS_GEN6(dev) && dev->pdev->revision < 8) { 4101 DRM_INFO("BLT not supported on this pre-production hardware;" 4102 " graphics performance will be degraded.\n"); 4103 return false; 4104 } 4105 #endif 4106 4107 return true; 4108 } 4109 4110 int 4111 i915_gem_init_hw(struct drm_device *dev) 4112 { 4113 drm_i915_private_t *dev_priv = dev->dev_private; 4114 int ret; 4115 4116 #ifdef notyet 4117 if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) 4118 return -EIO; 4119 #endif 4120 4121 if (IS_HASWELL(dev) && (I915_READ(0x120010) == 1)) 4122 I915_WRITE(0x9008, I915_READ(0x9008) | 0xf0000); 4123 4124 #ifdef notyet 4125 i915_gem_l3_remap(dev); 4126 #endif 4127 4128 i915_gem_init_swizzling(dev); 4129 4130 ret = intel_init_render_ring_buffer(dev); 4131 if (ret) 4132 return ret; 4133 4134 if (HAS_BSD(dev)) { 4135 ret = intel_init_bsd_ring_buffer(dev); 4136 if (ret) 4137 goto cleanup_render_ring; 4138 } 4139 4140 if (intel_enable_blt(dev)) { 4141 ret = intel_init_blt_ring_buffer(dev); 4142 if (ret) 4143 goto cleanup_bsd_ring; 4144 } 4145 4146 dev_priv->next_seqno = 1; 4147 4148 /* 4149 * XXX: There was some w/a described somewhere suggesting loading 4150 * contexts before PPGTT. 4151 */ 4152 i915_gem_context_init(dev); 4153 #ifdef notyet 4154 i915_gem_init_ppgtt(dev); 4155 #endif 4156 4157 return 0; 4158 4159 cleanup_bsd_ring: 4160 intel_cleanup_ring_buffer(&dev_priv->ring[VCS]); 4161 cleanup_render_ring: 4162 intel_cleanup_ring_buffer(&dev_priv->ring[RCS]); 4163 return ret; 4164 } 4165 4166 #ifdef notyet 4167 static bool 4168 intel_enable_ppgtt(struct drm_device *dev) 4169 { 4170 if (i915_enable_ppgtt >= 0) 4171 return i915_enable_ppgtt; 4172 4173 #ifdef CONFIG_INTEL_IOMMU 4174 /* Disable ppgtt on SNB if VT-d is on. */ 4175 if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) 4176 return false; 4177 #endif 4178 4179 return true; 4180 } 4181 #endif /* notyet */ 4182 4183 int i915_gem_init(struct drm_device *dev) 4184 { 4185 unsigned long gtt_size, mappable_size; 4186 int ret; 4187 4188 gtt_size = dev->agp->info.ai_aperture_size; 4189 mappable_size = dev->agp->info.ai_aperture_size; 4190 4191 DRM_LOCK(); 4192 #ifdef notyet 4193 if (intel_enable_ppgtt(dev) && HAS_ALIASING_PPGTT(dev)) { 4194 /* PPGTT pdes are stolen from global gtt ptes, so shrink the 4195 * aperture accordingly when using aliasing ppgtt. */ 4196 gtt_size -= I915_PPGTT_PD_ENTRIES*PAGE_SIZE; 4197 4198 i915_gem_init_global_gtt(dev, 0, mappable_size, gtt_size); 4199 4200 ret = i915_gem_init_aliasing_ppgtt(dev); 4201 if (ret) { 4202 mutex_unlock(&dev->struct_mutex); 4203 return ret; 4204 } 4205 } else { 4206 #endif 4207 /* Let GEM Manage all of the aperture. 4208 * 4209 * However, leave one page at the end still bound to the scratch 4210 * page. There are a number of places where the hardware 4211 * apparently prefetches past the end of the object, and we've 4212 * seen multiple hangs with the GPU head pointer stuck in a 4213 * batchbuffer bound at the last page of the aperture. One page 4214 * should be enough to keep any prefetching inside of the 4215 * aperture. 4216 */ 4217 i915_gem_init_global_gtt(dev, 0, mappable_size, 4218 gtt_size); 4219 #ifdef notyet 4220 } 4221 #endif 4222 4223 ret = i915_gem_init_hw(dev); 4224 DRM_UNLOCK(); 4225 if (ret) { 4226 i915_gem_cleanup_aliasing_ppgtt(dev); 4227 return ret; 4228 } 4229 4230 #ifdef __linux__ 4231 /* Allow hardware batchbuffers unless told otherwise, but not for KMS. */ 4232 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 4233 dev_priv->dri1.allow_batchbuffer = 1; 4234 #endif 4235 return 0; 4236 } 4237 4238 void 4239 i915_gem_cleanup_ringbuffer(struct drm_device *dev) 4240 { 4241 drm_i915_private_t *dev_priv = dev->dev_private; 4242 struct intel_ring_buffer *ring; 4243 int i; 4244 4245 for_each_ring(ring, dev_priv, i) 4246 intel_cleanup_ring_buffer(ring); 4247 } 4248 4249 int 4250 i915_gem_entervt_ioctl(struct drm_device *dev, void *data, 4251 struct drm_file *file_priv) 4252 { 4253 drm_i915_private_t *dev_priv = dev->dev_private; 4254 int ret; 4255 4256 if (drm_core_check_feature(dev, DRIVER_MODESET)) 4257 return 0; 4258 4259 if (atomic_read(&dev_priv->mm.wedged)) { 4260 DRM_ERROR("Reenabling wedged hardware, good luck\n"); 4261 atomic_set(&dev_priv->mm.wedged, 0); 4262 } 4263 4264 DRM_LOCK(); 4265 dev_priv->mm.suspended = 0; 4266 4267 ret = i915_gem_init_hw(dev); 4268 if (ret != 0) { 4269 DRM_UNLOCK(); 4270 return ret; 4271 } 4272 4273 BUG_ON(!list_empty(&dev_priv->mm.active_list)); 4274 DRM_UNLOCK(); 4275 4276 ret = drm_irq_install(dev); 4277 if (ret) 4278 goto cleanup_ringbuffer; 4279 4280 return 0; 4281 4282 cleanup_ringbuffer: 4283 DRM_LOCK(); 4284 i915_gem_cleanup_ringbuffer(dev); 4285 dev_priv->mm.suspended = 1; 4286 DRM_UNLOCK(); 4287 4288 return ret; 4289 } 4290 4291 int 4292 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, 4293 struct drm_file *file_priv) 4294 { 4295 if (drm_core_check_feature(dev, DRIVER_MODESET)) 4296 return 0; 4297 4298 drm_irq_uninstall(dev); 4299 return i915_gem_idle(dev); 4300 } 4301 4302 #ifdef notyet 4303 void 4304 i915_gem_lastclose(struct drm_device *dev) 4305 { 4306 int ret; 4307 4308 if (drm_core_check_feature(dev, DRIVER_MODESET)) 4309 return; 4310 4311 ret = i915_gem_idle(dev); 4312 if (ret) 4313 DRM_ERROR("failed to idle hardware: %d\n", ret); 4314 } 4315 #endif /* notyet */ 4316 4317 static void 4318 init_ring_lists(struct intel_ring_buffer *ring) 4319 { 4320 INIT_LIST_HEAD(&ring->active_list); 4321 INIT_LIST_HEAD(&ring->request_list); 4322 } 4323 4324 void 4325 i915_gem_load(struct drm_device *dev) 4326 { 4327 int i; 4328 drm_i915_private_t *dev_priv = dev->dev_private; 4329 4330 INIT_LIST_HEAD(&dev_priv->mm.active_list); 4331 INIT_LIST_HEAD(&dev_priv->mm.inactive_list); 4332 INIT_LIST_HEAD(&dev_priv->mm.unbound_list); 4333 INIT_LIST_HEAD(&dev_priv->mm.bound_list); 4334 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 4335 for (i = 0; i < I915_NUM_RINGS; i++) 4336 init_ring_lists(&dev_priv->ring[i]); 4337 for (i = 0; i < I915_MAX_NUM_FENCES; i++) 4338 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); 4339 task_set(&dev_priv->mm.retire_task, i915_gem_retire_work_handler, 4340 dev_priv, NULL); 4341 timeout_set(&dev_priv->mm.retire_timer, inteldrm_timeout, dev_priv); 4342 #if 0 4343 init_completion(&dev_priv->error_completion); 4344 #else 4345 dev_priv->error_completion = 0; 4346 #endif 4347 4348 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */ 4349 if (IS_GEN3(dev)) { 4350 I915_WRITE(MI_ARB_STATE, 4351 _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE)); 4352 } 4353 4354 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; 4355 4356 /* Old X drivers will take 0-2 for front, back, depth buffers */ 4357 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 4358 dev_priv->fence_reg_start = 3; 4359 4360 if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) 4361 dev_priv->num_fence_regs = 16; 4362 else 4363 dev_priv->num_fence_regs = 8; 4364 4365 /* Initialize fence registers to zero */ 4366 i915_gem_reset_fences(dev); 4367 4368 i915_gem_detect_bit_6_swizzle(dev); 4369 #if 0 4370 init_waitqueue_head(&dev_priv->pending_flip_queue); 4371 #endif 4372 4373 dev_priv->mm.interruptible = true; 4374 4375 #if 0 4376 dev_priv->mm.inactive_shrinker.shrink = i915_gem_inactive_shrink; 4377 dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS; 4378 register_shrinker(&dev_priv->mm.inactive_shrinker); 4379 #endif 4380 } 4381 4382 /* 4383 * Create a physically contiguous memory object for this object 4384 * e.g. for cursor + overlay regs 4385 */ 4386 static int i915_gem_init_phys_object(struct drm_device *dev, 4387 int id, int size, int align) 4388 { 4389 drm_i915_private_t *dev_priv = dev->dev_private; 4390 struct drm_i915_gem_phys_object *phys_obj; 4391 int ret; 4392 4393 if (dev_priv->mm.phys_objs[id - 1] || !size) 4394 return 0; 4395 4396 phys_obj = kzalloc(sizeof(struct drm_i915_gem_phys_object), GFP_KERNEL); 4397 if (!phys_obj) 4398 return -ENOMEM; 4399 4400 phys_obj->id = id; 4401 4402 phys_obj->handle = drm_dmamem_alloc(dev->dmat, size, align, 1, size, BUS_DMA_NOCACHE, 0); 4403 if (!phys_obj->handle) { 4404 ret = -ENOMEM; 4405 goto kfree_obj; 4406 } 4407 4408 dev_priv->mm.phys_objs[id - 1] = phys_obj; 4409 4410 return 0; 4411 kfree_obj: 4412 kfree(phys_obj); 4413 return ret; 4414 } 4415 4416 #ifdef notyet 4417 static void i915_gem_free_phys_object(struct drm_device *dev, int id) 4418 { 4419 drm_i915_private_t *dev_priv = dev->dev_private; 4420 struct drm_i915_gem_phys_object *phys_obj; 4421 4422 if (!dev_priv->mm.phys_objs[id - 1]) 4423 return; 4424 4425 phys_obj = dev_priv->mm.phys_objs[id - 1]; 4426 if (phys_obj->cur_obj) { 4427 i915_gem_detach_phys_object(dev, phys_obj->cur_obj); 4428 } 4429 4430 #ifdef CONFIG_X86 4431 set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE); 4432 #endif 4433 drm_pci_free(dev, phys_obj->handle); 4434 kfree(phys_obj); 4435 dev_priv->mm.phys_objs[id - 1] = NULL; 4436 } 4437 4438 void i915_gem_free_all_phys_object(struct drm_device *dev) 4439 { 4440 int i; 4441 4442 for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++) 4443 i915_gem_free_phys_object(dev, i); 4444 } 4445 #endif /* notyet */ 4446 4447 void i915_gem_detach_phys_object(struct drm_device *dev, 4448 struct drm_i915_gem_object *obj) 4449 { 4450 char *vaddr; 4451 int i; 4452 int page_count; 4453 4454 if (!obj->phys_obj) 4455 return; 4456 vaddr = obj->phys_obj->handle->kva; 4457 4458 page_count = obj->base.size / PAGE_SIZE; 4459 for (i = 0; i < page_count; i++) { 4460 #ifdef notyet 4461 struct page *page = shmem_read_mapping_page(mapping, i); 4462 if (!IS_ERR(page)) { 4463 char *dst = kmap_atomic(page); 4464 memcpy(dst, vaddr + i*PAGE_SIZE, PAGE_SIZE); 4465 kunmap_atomic(dst); 4466 4467 drm_clflush_pages(&page, 1); 4468 4469 set_page_dirty(page); 4470 mark_page_accessed(page); 4471 page_cache_release(page); 4472 } 4473 #endif 4474 } 4475 i915_gem_chipset_flush(dev); 4476 4477 obj->phys_obj->cur_obj = NULL; 4478 obj->phys_obj = NULL; 4479 } 4480 4481 int 4482 i915_gem_attach_phys_object(struct drm_device *dev, 4483 struct drm_i915_gem_object *obj, 4484 int id, 4485 int align) 4486 { 4487 drm_i915_private_t *dev_priv = dev->dev_private; 4488 int ret = 0; 4489 int page_count; 4490 int i; 4491 4492 if (id > I915_MAX_PHYS_OBJECT) 4493 return -EINVAL; 4494 4495 if (obj->phys_obj) { 4496 if (obj->phys_obj->id == id) 4497 return 0; 4498 i915_gem_detach_phys_object(dev, obj); 4499 } 4500 4501 /* create a new object */ 4502 if (!dev_priv->mm.phys_objs[id - 1]) { 4503 ret = i915_gem_init_phys_object(dev, id, 4504 obj->base.size, align); 4505 if (ret) { 4506 DRM_ERROR("failed to init phys object %d size: %zu\n", 4507 id, obj->base.size); 4508 return ret; 4509 } 4510 } 4511 4512 /* bind to the object */ 4513 obj->phys_obj = dev_priv->mm.phys_objs[id - 1]; 4514 obj->phys_obj->cur_obj = obj; 4515 4516 page_count = obj->base.size / PAGE_SIZE; 4517 4518 for (i = 0; i < page_count; i++) { 4519 #ifdef notyet 4520 struct page *page; 4521 char *dst, *src; 4522 4523 page = shmem_read_mapping_page(mapping, i); 4524 if (IS_ERR(page)) 4525 return PTR_ERR(page); 4526 4527 src = kmap_atomic(page); 4528 dst = obj->phys_obj->handle->kva + (i * PAGE_SIZE); 4529 memcpy(dst, src, PAGE_SIZE); 4530 kunmap_atomic(src); 4531 4532 mark_page_accessed(page); 4533 page_cache_release(page); 4534 #endif 4535 } 4536 4537 return 0; 4538 } 4539 4540 static int 4541 i915_gem_phys_pwrite(struct drm_device *dev, 4542 struct drm_i915_gem_object *obj, 4543 struct drm_i915_gem_pwrite *args, 4544 struct drm_file *file_priv) 4545 { 4546 void *vaddr = obj->phys_obj->handle->kva + args->offset; 4547 int ret; 4548 4549 ret = -copyin((char *)(uintptr_t)args->data_ptr, 4550 vaddr, args->size); 4551 4552 i915_gem_chipset_flush(dev); 4553 4554 return ret; 4555 } 4556 4557 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 4558 { 4559 struct drm_i915_file_private *file_priv = file->driver_priv; 4560 4561 /* Clean up our request list when the client is going away, so that 4562 * later retire_requests won't dereference our soon-to-be-gone 4563 * file_priv. 4564 */ 4565 mtx_enter(&file_priv->mm.lock); 4566 while (!list_empty(&file_priv->mm.request_list)) { 4567 struct drm_i915_gem_request *request; 4568 4569 request = list_first_entry(&file_priv->mm.request_list, 4570 struct drm_i915_gem_request, 4571 client_list); 4572 list_del(&request->client_list); 4573 request->file_priv = NULL; 4574 } 4575 mtx_leave(&file_priv->mm.lock); 4576 } 4577 4578 #ifdef notyet 4579 static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task) 4580 { 4581 if (!mutex_is_locked(mutex)) 4582 return false; 4583 4584 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES) 4585 return mutex->owner == task; 4586 #else 4587 /* Since UP may be pre-empted, we cannot assume that we own the lock */ 4588 return false; 4589 #endif 4590 } 4591 4592 static int 4593 i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc) 4594 { 4595 struct drm_i915_private *dev_priv = 4596 container_of(shrinker, 4597 struct drm_i915_private, 4598 mm.inactive_shrinker); 4599 struct drm_device *dev = dev_priv->dev; 4600 struct drm_i915_gem_object *obj; 4601 int nr_to_scan = sc->nr_to_scan; 4602 bool unlock = true; 4603 int cnt; 4604 4605 if (!mutex_trylock(&dev->struct_mutex)) { 4606 if (!mutex_is_locked_by(&dev->struct_mutex, current)) 4607 return 0; 4608 4609 if (dev_priv->mm.shrinker_no_lock_stealing) 4610 return 0; 4611 4612 unlock = false; 4613 } 4614 4615 if (nr_to_scan) { 4616 nr_to_scan -= i915_gem_purge(dev_priv, nr_to_scan); 4617 if (nr_to_scan > 0) 4618 nr_to_scan -= __i915_gem_shrink(dev_priv, nr_to_scan, 4619 false); 4620 if (nr_to_scan > 0) 4621 i915_gem_shrink_all(dev_priv); 4622 } 4623 4624 cnt = 0; 4625 list_for_each_entry(obj, &dev_priv->mm.unbound_list, gtt_list) 4626 if (obj->pages_pin_count == 0) 4627 cnt += obj->base.size >> PAGE_SHIFT; 4628 list_for_each_entry(obj, &dev_priv->mm.inactive_list, mm_list) 4629 if (obj->pin_count == 0 && obj->pages_pin_count == 0) 4630 cnt += obj->base.size >> PAGE_SHIFT; 4631 4632 if (unlock) 4633 mutex_unlock(&dev->struct_mutex); 4634 return cnt; 4635 } 4636 #endif /* notyet */ 4637 4638 #define NSEC_PER_SEC 1000000000L 4639 4640 static inline int64_t 4641 timespec_to_ns(const struct timespec *ts) 4642 { 4643 return ((ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec); 4644 } 4645 4646 static inline int 4647 timespec_to_jiffies(const struct timespec *ts) 4648 { 4649 long long to_ticks; 4650 4651 to_ticks = (long long)hz * ts->tv_sec + ts->tv_nsec / (tick * 1000); 4652 if (to_ticks > INT_MAX) 4653 to_ticks = INT_MAX; 4654 4655 return ((int)to_ticks); 4656 } 4657 4658 static struct timespec 4659 ns_to_timespec(const int64_t nsec) 4660 { 4661 struct timespec ts; 4662 int32_t rem; 4663 4664 if (nsec == 0) { 4665 ts.tv_sec = 0; 4666 ts.tv_nsec = 0; 4667 return (ts); 4668 } 4669 4670 ts.tv_sec = nsec / NSEC_PER_SEC; 4671 rem = nsec % NSEC_PER_SEC; 4672 if (rem < 0) { 4673 ts.tv_sec--; 4674 rem += NSEC_PER_SEC; 4675 } 4676 ts.tv_nsec = rem; 4677 return (ts); 4678 } 4679 4680 static inline int 4681 timespec_valid(const struct timespec *ts) 4682 { 4683 if (ts->tv_sec < 0 || ts->tv_sec > 100000000 || 4684 ts->tv_nsec < 0 || ts->tv_nsec >= 1000000000) 4685 return (0); 4686 return (1); 4687 } 4688