1 /* 2 * Copyright © 2008-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include <dev/pci/drm/drmP.h> 29 #include <dev/pci/drm/drm_vma_manager.h> 30 #include <dev/pci/drm/i915_drm.h> 31 #include "i915_drv.h" 32 #include "i915_vgpu.h" 33 #include "i915_trace.h" 34 #include "intel_drv.h" 35 #ifdef __linux__ 36 #include <linux/shmem_fs.h> 37 #include <linux/slab.h> 38 #include <linux/swap.h> 39 #include <linux/pci.h> 40 #include <linux/dma-buf.h> 41 #endif 42 43 #define RQ_BUG_ON(expr) 44 45 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); 46 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); 47 static void 48 i915_gem_object_retire__write(struct drm_i915_gem_object *obj); 49 static void 50 i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring); 51 52 static bool cpu_cache_is_coherent(struct drm_device *dev, 53 enum i915_cache_level level) 54 { 55 return HAS_LLC(dev) || level != I915_CACHE_NONE; 56 } 57 58 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 59 { 60 if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) 61 return true; 62 63 return obj->pin_display; 64 } 65 66 /* some bookkeeping */ 67 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 68 size_t size) 69 { 70 spin_lock(&dev_priv->mm.object_stat_lock); 71 dev_priv->mm.object_count++; 72 dev_priv->mm.object_memory += size; 73 spin_unlock(&dev_priv->mm.object_stat_lock); 74 } 75 76 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 77 size_t size) 78 { 79 spin_lock(&dev_priv->mm.object_stat_lock); 80 dev_priv->mm.object_count--; 81 dev_priv->mm.object_memory -= size; 82 spin_unlock(&dev_priv->mm.object_stat_lock); 83 } 84 85 static int 86 i915_gem_wait_for_error(struct i915_gpu_error *error) 87 { 88 int ret; 89 90 #define EXIT_COND (!i915_reset_in_progress(error) || \ 91 i915_terminally_wedged(error)) 92 if (EXIT_COND) 93 return 0; 94 95 /* 96 * Only wait 10 seconds for the gpu reset to complete to avoid hanging 97 * userspace. If it takes that long something really bad is going on and 98 * we should simply try to bail out and fail as gracefully as possible. 99 */ 100 ret = wait_event_interruptible_timeout(error->reset_queue, 101 EXIT_COND, 102 10*HZ); 103 if (ret == 0) { 104 DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); 105 return -EIO; 106 } else if (ret < 0) { 107 return ret; 108 } 109 #undef EXIT_COND 110 111 return 0; 112 } 113 114 int i915_mutex_lock_interruptible(struct drm_device *dev) 115 { 116 struct drm_i915_private *dev_priv = dev->dev_private; 117 int ret; 118 119 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 120 if (ret) 121 return ret; 122 123 ret = mutex_lock_interruptible(&dev->struct_mutex); 124 if (ret) 125 return ret; 126 127 WARN_ON(i915_verify_lists(dev)); 128 return 0; 129 } 130 131 int 132 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 133 struct drm_file *file) 134 { 135 struct drm_i915_private *dev_priv = dev->dev_private; 136 struct drm_i915_gem_get_aperture *args = data; 137 struct i915_gtt *ggtt = &dev_priv->gtt; 138 struct i915_vma *vma; 139 size_t pinned; 140 141 pinned = 0; 142 mutex_lock(&dev->struct_mutex); 143 list_for_each_entry(vma, &ggtt->base.active_list, mm_list) 144 if (vma->pin_count) 145 pinned += vma->node.size; 146 list_for_each_entry(vma, &ggtt->base.inactive_list, mm_list) 147 if (vma->pin_count) 148 pinned += vma->node.size; 149 mutex_unlock(&dev->struct_mutex); 150 151 args->aper_size = dev_priv->gtt.base.total; 152 args->aper_available_size = args->aper_size - pinned; 153 154 return 0; 155 } 156 157 static int 158 i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) 159 { 160 #ifdef __linux__ 161 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; 162 char *vaddr = obj->phys_handle->vaddr; 163 #else 164 char *vaddr = obj->phys_handle->kva; 165 #endif 166 struct sg_table *st; 167 struct scatterlist *sg; 168 int i; 169 170 if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) 171 return -EINVAL; 172 173 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 174 struct vm_page *page; 175 char *src; 176 177 #ifdef __linux__ 178 page = shmem_read_mapping_page(mapping, i); 179 if (IS_ERR(page)) 180 return PTR_ERR(page); 181 #else 182 struct pglist plist; 183 TAILQ_INIT(&plist); 184 if (uvm_objwire(obj->base.uao, i * PAGE_SIZE, (i + 1) * PAGE_SIZE, &plist)) 185 return -ENOMEM; 186 page = TAILQ_FIRST(&plist); 187 #endif 188 189 src = kmap_atomic(page); 190 memcpy(vaddr, src, PAGE_SIZE); 191 drm_clflush_virt_range(vaddr, PAGE_SIZE); 192 kunmap_atomic(src); 193 194 #ifdef __linux__ 195 page_cache_release(page); 196 #else 197 uvm_objunwire(obj->base.uao, i * PAGE_SIZE, (i + 1) * PAGE_SIZE); 198 #endif 199 vaddr += PAGE_SIZE; 200 } 201 202 i915_gem_chipset_flush(obj->base.dev); 203 204 st = kmalloc(sizeof(*st), GFP_KERNEL); 205 if (st == NULL) 206 return -ENOMEM; 207 208 if (sg_alloc_table(st, 1, GFP_KERNEL)) { 209 kfree(st); 210 return -ENOMEM; 211 } 212 213 sg = st->sgl; 214 sg->offset = 0; 215 sg->length = obj->base.size; 216 217 #ifdef __linux__ 218 sg_dma_address(sg) = obj->phys_handle->busaddr; 219 #else 220 sg_dma_address(sg) = obj->phys_handle->segs[0].ds_addr; 221 #endif 222 sg_dma_len(sg) = obj->base.size; 223 224 obj->pages = st; 225 return 0; 226 } 227 228 static void 229 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj) 230 { 231 int ret; 232 233 BUG_ON(obj->madv == __I915_MADV_PURGED); 234 235 ret = i915_gem_object_set_to_cpu_domain(obj, true); 236 if (ret) { 237 /* In the event of a disaster, abandon all caches and 238 * hope for the best. 239 */ 240 WARN_ON(ret != -EIO); 241 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 242 } 243 244 if (obj->madv == I915_MADV_DONTNEED) 245 obj->dirty = 0; 246 247 if (obj->dirty) { 248 #ifdef __linux__ 249 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; 250 char *vaddr = obj->phys_handle->vaddr; 251 #else 252 char *vaddr = obj->phys_handle->kva; 253 #endif 254 int i; 255 256 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 257 struct vm_page *page; 258 char *dst; 259 260 #ifdef __linux__ 261 page = shmem_read_mapping_page(mapping, i); 262 if (IS_ERR(page)) 263 continue; 264 #else 265 struct pglist plist; 266 TAILQ_INIT(&plist); 267 if (uvm_objwire(obj->base.uao, i * PAGE_SIZE, (i + 1) * PAGE_SIZE, &plist)) 268 continue; 269 page = TAILQ_FIRST(&plist); 270 #endif 271 272 dst = kmap_atomic(page); 273 drm_clflush_virt_range(vaddr, PAGE_SIZE); 274 memcpy(dst, vaddr, PAGE_SIZE); 275 kunmap_atomic(dst); 276 277 set_page_dirty(page); 278 #ifdef __linux__ 279 if (obj->madv == I915_MADV_WILLNEED) 280 mark_page_accessed(page); 281 page_cache_release(page); 282 #else 283 uvm_objunwire(obj->base.uao, i * PAGE_SIZE, (i + 1) * PAGE_SIZE); 284 #endif 285 vaddr += PAGE_SIZE; 286 } 287 obj->dirty = 0; 288 } 289 290 sg_free_table(obj->pages); 291 kfree(obj->pages); 292 } 293 294 static void 295 i915_gem_object_release_phys(struct drm_i915_gem_object *obj) 296 { 297 #ifdef __linux__ 298 drm_pci_free(obj->base.dev, obj->phys_handle); 299 #else 300 drm_dmamem_free(obj->base.dev->dmat, obj->phys_handle); 301 #endif 302 } 303 304 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { 305 .get_pages = i915_gem_object_get_pages_phys, 306 .put_pages = i915_gem_object_put_pages_phys, 307 .release = i915_gem_object_release_phys, 308 }; 309 310 static int 311 drop_pages(struct drm_i915_gem_object *obj) 312 { 313 struct i915_vma *vma, *next; 314 int ret; 315 316 drm_gem_object_reference(&obj->base); 317 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) 318 if (i915_vma_unbind(vma)) 319 break; 320 321 ret = i915_gem_object_put_pages(obj); 322 drm_gem_object_unreference(&obj->base); 323 324 return ret; 325 } 326 327 int 328 i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, 329 int align) 330 { 331 drm_dma_handle_t *phys; 332 int ret; 333 334 if (obj->phys_handle) { 335 if ((unsigned long)obj->phys_handle->kva & (align -1)) 336 return -EBUSY; 337 338 return 0; 339 } 340 341 if (obj->madv != I915_MADV_WILLNEED) 342 return -EFAULT; 343 344 if (obj->base.filp == NULL) 345 return -EINVAL; 346 347 ret = drop_pages(obj); 348 if (ret) 349 return ret; 350 351 /* create a new object */ 352 #ifdef __linux__ 353 phys = drm_pci_alloc(obj->base.dev, obj->base.size, align); 354 #else 355 phys = drm_dmamem_alloc(obj->base.dev->dmat, obj->base.size, align, 356 1, obj->base.size, BUS_DMA_NOCACHE, 0); 357 #endif 358 if (!phys) 359 return -ENOMEM; 360 361 obj->phys_handle = phys; 362 obj->ops = &i915_gem_phys_ops; 363 364 return i915_gem_object_get_pages(obj); 365 } 366 367 static int 368 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, 369 struct drm_i915_gem_pwrite *args, 370 struct drm_file *file_priv) 371 { 372 struct drm_device *dev = obj->base.dev; 373 void *vaddr = obj->phys_handle->kva + args->offset; 374 char __user *user_data = to_user_ptr(args->data_ptr); 375 int ret = 0; 376 377 /* We manually control the domain here and pretend that it 378 * remains coherent i.e. in the GTT domain, like shmem_pwrite. 379 */ 380 ret = i915_gem_object_wait_rendering(obj, false); 381 if (ret) 382 return ret; 383 384 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 385 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) { 386 unsigned long unwritten; 387 388 /* The physical object once assigned is fixed for the lifetime 389 * of the obj, so we can safely drop the lock and continue 390 * to access vaddr. 391 */ 392 mutex_unlock(&dev->struct_mutex); 393 unwritten = copy_from_user(vaddr, user_data, args->size); 394 mutex_lock(&dev->struct_mutex); 395 if (unwritten) { 396 ret = -EFAULT; 397 goto out; 398 } 399 } 400 401 drm_clflush_virt_range(vaddr, args->size); 402 i915_gem_chipset_flush(dev); 403 404 out: 405 intel_fb_obj_flush(obj, false, ORIGIN_CPU); 406 return ret; 407 } 408 409 void *i915_gem_object_alloc(struct drm_device *dev) 410 { 411 struct drm_i915_private *dev_priv = dev->dev_private; 412 #ifdef __linux__ 413 return kmem_cache_zalloc(dev_priv->objects, GFP_KERNEL); 414 #else 415 return pool_get(&dev_priv->objects, PR_WAITOK | PR_ZERO); 416 #endif 417 } 418 419 void i915_gem_object_free(struct drm_i915_gem_object *obj) 420 { 421 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 422 #ifdef __linux__ 423 kmem_cache_free(dev_priv->objects, obj); 424 #else 425 pool_put(&dev_priv->objects, obj); 426 #endif 427 } 428 429 static int 430 i915_gem_create(struct drm_file *file, 431 struct drm_device *dev, 432 uint64_t size, 433 uint32_t *handle_p) 434 { 435 struct drm_i915_gem_object *obj; 436 int ret; 437 u32 handle; 438 439 size = roundup(size, PAGE_SIZE); 440 if (size == 0) 441 return -EINVAL; 442 443 /* Allocate the new object */ 444 obj = i915_gem_alloc_object(dev, size); 445 if (obj == NULL) 446 return -ENOMEM; 447 448 ret = drm_gem_handle_create(file, &obj->base, &handle); 449 /* drop reference from allocate - handle holds it now */ 450 drm_gem_object_unreference_unlocked(&obj->base); 451 if (ret) 452 return ret; 453 454 *handle_p = handle; 455 return 0; 456 } 457 458 int 459 i915_gem_dumb_create(struct drm_file *file, 460 struct drm_device *dev, 461 struct drm_mode_create_dumb *args) 462 { 463 /* have to work out size/pitch and return them */ 464 args->pitch = roundup2(args->width * DIV_ROUND_UP(args->bpp, 8), 64); 465 args->size = args->pitch * args->height; 466 return i915_gem_create(file, dev, 467 args->size, &args->handle); 468 } 469 470 /** 471 * Creates a new mm object and returns a handle to it. 472 */ 473 int 474 i915_gem_create_ioctl(struct drm_device *dev, void *data, 475 struct drm_file *file) 476 { 477 struct drm_i915_gem_create *args = data; 478 479 return i915_gem_create(file, dev, 480 args->size, &args->handle); 481 } 482 483 static inline int 484 __copy_to_user_swizzled(char __user *cpu_vaddr, 485 const char *gpu_vaddr, int gpu_offset, 486 int length) 487 { 488 int ret, cpu_offset = 0; 489 490 while (length > 0) { 491 int cacheline_end = roundup2(gpu_offset + 1, 64); 492 int this_length = min(cacheline_end - gpu_offset, length); 493 int swizzled_gpu_offset = gpu_offset ^ 64; 494 495 ret = __copy_to_user(cpu_vaddr + cpu_offset, 496 gpu_vaddr + swizzled_gpu_offset, 497 this_length); 498 if (ret) 499 return ret + length; 500 501 cpu_offset += this_length; 502 gpu_offset += this_length; 503 length -= this_length; 504 } 505 506 return 0; 507 } 508 509 static inline int 510 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, 511 const char __user *cpu_vaddr, 512 int length) 513 { 514 int ret, cpu_offset = 0; 515 516 while (length > 0) { 517 int cacheline_end = roundup2(gpu_offset + 1, 64); 518 int this_length = min(cacheline_end - gpu_offset, length); 519 int swizzled_gpu_offset = gpu_offset ^ 64; 520 521 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, 522 cpu_vaddr + cpu_offset, 523 this_length); 524 if (ret) 525 return ret + length; 526 527 cpu_offset += this_length; 528 gpu_offset += this_length; 529 length -= this_length; 530 } 531 532 return 0; 533 } 534 535 /* 536 * Pins the specified object's pages and synchronizes the object with 537 * GPU accesses. Sets needs_clflush to non-zero if the caller should 538 * flush the object from the CPU cache. 539 */ 540 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, 541 int *needs_clflush) 542 { 543 int ret; 544 545 *needs_clflush = 0; 546 547 if (!obj->base.filp) 548 return -EINVAL; 549 550 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) { 551 /* If we're not in the cpu read domain, set ourself into the gtt 552 * read domain and manually flush cachelines (if required). This 553 * optimizes for the case when the gpu will dirty the data 554 * anyway again before the next pread happens. */ 555 *needs_clflush = !cpu_cache_is_coherent(obj->base.dev, 556 obj->cache_level); 557 ret = i915_gem_object_wait_rendering(obj, true); 558 if (ret) 559 return ret; 560 } 561 562 ret = i915_gem_object_get_pages(obj); 563 if (ret) 564 return ret; 565 566 i915_gem_object_pin_pages(obj); 567 568 return ret; 569 } 570 571 /* Per-page copy function for the shmem pread fastpath. 572 * Flushes invalid cachelines before reading the target if 573 * needs_clflush is set. */ 574 static int 575 shmem_pread_fast(struct vm_page *page, int shmem_page_offset, int page_length, 576 char __user *user_data, 577 bool page_do_bit17_swizzling, bool needs_clflush) 578 { 579 char *vaddr; 580 int ret; 581 582 if (unlikely(page_do_bit17_swizzling)) 583 return -EINVAL; 584 585 vaddr = kmap_atomic(page); 586 if (needs_clflush) 587 drm_clflush_virt_range(vaddr + shmem_page_offset, 588 page_length); 589 ret = __copy_to_user_inatomic(user_data, 590 vaddr + shmem_page_offset, 591 page_length); 592 kunmap_atomic(vaddr); 593 594 return ret ? -EFAULT : 0; 595 } 596 597 static void 598 shmem_clflush_swizzled_range(char *addr, unsigned long length, 599 bool swizzled) 600 { 601 if (unlikely(swizzled)) { 602 unsigned long start = (unsigned long) addr; 603 unsigned long end = (unsigned long) addr + length; 604 605 /* For swizzling simply ensure that we always flush both 606 * channels. Lame, but simple and it works. Swizzled 607 * pwrite/pread is far from a hotpath - current userspace 608 * doesn't use it at all. */ 609 start = round_down(start, 128); 610 end = round_up(end, 128); 611 612 drm_clflush_virt_range((void *)start, end - start); 613 } else { 614 drm_clflush_virt_range(addr, length); 615 } 616 617 } 618 619 /* Only difference to the fast-path function is that this can handle bit17 620 * and uses non-atomic copy and kmap functions. */ 621 static int 622 shmem_pread_slow(struct vm_page *page, int shmem_page_offset, int page_length, 623 char __user *user_data, 624 bool page_do_bit17_swizzling, bool needs_clflush) 625 { 626 char *vaddr; 627 int ret; 628 629 vaddr = kmap(page); 630 if (needs_clflush) 631 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 632 page_length, 633 page_do_bit17_swizzling); 634 635 if (page_do_bit17_swizzling) 636 ret = __copy_to_user_swizzled(user_data, 637 vaddr, shmem_page_offset, 638 page_length); 639 else 640 ret = __copy_to_user(user_data, 641 vaddr + shmem_page_offset, 642 page_length); 643 kunmap(page); 644 645 return ret ? - EFAULT : 0; 646 } 647 648 static int 649 i915_gem_shmem_pread(struct drm_device *dev, 650 struct drm_i915_gem_object *obj, 651 struct drm_i915_gem_pread *args, 652 struct drm_file *file) 653 { 654 char __user *user_data; 655 ssize_t remain; 656 loff_t offset; 657 int shmem_page_offset, page_length, ret = 0; 658 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 659 #ifdef __linux__ 660 int prefaulted = 0; 661 #endif 662 int needs_clflush = 0; 663 struct sg_page_iter sg_iter; 664 665 user_data = to_user_ptr(args->data_ptr); 666 remain = args->size; 667 668 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 669 670 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); 671 if (ret) 672 return ret; 673 674 offset = args->offset; 675 676 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 677 offset >> PAGE_SHIFT) { 678 struct vm_page *page = sg_page_iter_page(&sg_iter); 679 680 if (remain <= 0) 681 break; 682 683 /* Operation in this page 684 * 685 * shmem_page_offset = offset within page in shmem file 686 * page_length = bytes to copy for this page 687 */ 688 shmem_page_offset = offset_in_page(offset); 689 page_length = remain; 690 if ((shmem_page_offset + page_length) > PAGE_SIZE) 691 page_length = PAGE_SIZE - shmem_page_offset; 692 693 page_do_bit17_swizzling = obj_do_bit17_swizzling && 694 (page_to_phys(page) & (1 << 17)) != 0; 695 696 ret = shmem_pread_fast(page, shmem_page_offset, page_length, 697 user_data, page_do_bit17_swizzling, 698 needs_clflush); 699 if (ret == 0) 700 goto next_page; 701 702 mutex_unlock(&dev->struct_mutex); 703 704 #ifdef __linux__ 705 if (likely(!i915.prefault_disable) && !prefaulted) { 706 ret = fault_in_multipages_writeable(user_data, remain); 707 /* Userspace is tricking us, but we've already clobbered 708 * its pages with the prefault and promised to write the 709 * data up to the first fault. Hence ignore any errors 710 * and just continue. */ 711 (void)ret; 712 prefaulted = 1; 713 } 714 #endif 715 716 ret = shmem_pread_slow(page, shmem_page_offset, page_length, 717 user_data, page_do_bit17_swizzling, 718 needs_clflush); 719 720 mutex_lock(&dev->struct_mutex); 721 722 if (ret) 723 goto out; 724 725 next_page: 726 remain -= page_length; 727 user_data += page_length; 728 offset += page_length; 729 } 730 731 out: 732 i915_gem_object_unpin_pages(obj); 733 734 return ret; 735 } 736 737 /** 738 * Reads data from the object referenced by handle. 739 * 740 * On error, the contents of *data are undefined. 741 */ 742 int 743 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 744 struct drm_file *file) 745 { 746 struct drm_i915_gem_pread *args = data; 747 struct drm_i915_gem_object *obj; 748 int ret = 0; 749 750 if (args->size == 0) 751 return 0; 752 753 if (!access_ok(VERIFY_WRITE, 754 to_user_ptr(args->data_ptr), 755 args->size)) 756 return -EFAULT; 757 758 ret = i915_mutex_lock_interruptible(dev); 759 if (ret) 760 return ret; 761 762 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 763 if (&obj->base == NULL) { 764 ret = -ENOENT; 765 goto unlock; 766 } 767 768 /* Bounds check source. */ 769 if (args->offset > obj->base.size || 770 args->size > obj->base.size - args->offset) { 771 ret = -EINVAL; 772 goto out; 773 } 774 775 /* prime objects have no backing filp to GEM pread/pwrite 776 * pages from. 777 */ 778 if (!obj->base.filp) { 779 ret = -EINVAL; 780 goto out; 781 } 782 783 trace_i915_gem_object_pread(obj, args->offset, args->size); 784 785 ret = i915_gem_shmem_pread(dev, obj, args, file); 786 787 out: 788 drm_gem_object_unreference(&obj->base); 789 unlock: 790 mutex_unlock(&dev->struct_mutex); 791 return ret; 792 } 793 794 /* This is the fast write path which cannot handle 795 * page faults in the source data 796 */ 797 798 #ifdef __linux__ 799 static inline int 800 fast_user_write(struct io_mapping *mapping, 801 loff_t page_base, int page_offset, 802 char __user *user_data, 803 int length) 804 { 805 void __iomem *vaddr_atomic; 806 void *vaddr; 807 unsigned long unwritten; 808 809 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); 810 /* We can use the cpu mem copy function because this is X86. */ 811 vaddr = (void __force*)vaddr_atomic + page_offset; 812 unwritten = __copy_from_user_inatomic_nocache(vaddr, 813 user_data, length); 814 io_mapping_unmap_atomic(vaddr_atomic); 815 return unwritten; 816 } 817 #else 818 static inline int 819 fast_user_write(struct drm_i915_private *dev_priv, 820 bus_size_t page_base, int page_offset, 821 char __user *user_data, 822 int length) 823 { 824 bus_space_handle_t bsh; 825 void __iomem *vaddr_atomic; 826 void *vaddr; 827 unsigned long unwritten; 828 829 agp_map_atomic(dev_priv->agph, page_base, &bsh); 830 vaddr_atomic = bus_space_vaddr(dev_priv->bst, bsh); 831 /* We can use the cpu mem copy function because this is X86. */ 832 vaddr = (void __force*)vaddr_atomic + page_offset; 833 unwritten = __copy_from_user_inatomic_nocache(vaddr, 834 user_data, length); 835 agp_unmap_atomic(dev_priv->agph, bsh); 836 return unwritten; 837 } 838 #endif 839 840 /** 841 * This is the fast pwrite path, where we copy the data directly from the 842 * user into the GTT, uncached. 843 */ 844 static int 845 i915_gem_gtt_pwrite_fast(struct drm_device *dev, 846 struct drm_i915_gem_object *obj, 847 struct drm_i915_gem_pwrite *args, 848 struct drm_file *file) 849 { 850 struct drm_i915_private *dev_priv = dev->dev_private; 851 ssize_t remain; 852 loff_t offset, page_base; 853 char __user *user_data; 854 int page_offset, page_length, ret; 855 856 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK); 857 if (ret) 858 goto out; 859 860 ret = i915_gem_object_set_to_gtt_domain(obj, true); 861 if (ret) 862 goto out_unpin; 863 864 ret = i915_gem_object_put_fence(obj); 865 if (ret) 866 goto out_unpin; 867 868 user_data = to_user_ptr(args->data_ptr); 869 remain = args->size; 870 871 offset = i915_gem_obj_ggtt_offset(obj) + args->offset; 872 873 intel_fb_obj_invalidate(obj, ORIGIN_GTT); 874 875 while (remain > 0) { 876 /* Operation in this page 877 * 878 * page_base = page offset within aperture 879 * page_offset = offset within page 880 * page_length = bytes to copy for this page 881 */ 882 page_base = trunc_page(offset); 883 page_offset = offset_in_page(offset); 884 page_length = remain; 885 if ((page_offset + remain) > PAGE_SIZE) 886 page_length = PAGE_SIZE - page_offset; 887 888 /* If we get a fault while copying data, then (presumably) our 889 * source page isn't available. Return the error and we'll 890 * retry in the slow path. 891 */ 892 if (fast_user_write(dev_priv, page_base, 893 page_offset, user_data, page_length)) { 894 ret = -EFAULT; 895 goto out_flush; 896 } 897 898 remain -= page_length; 899 user_data += page_length; 900 offset += page_length; 901 } 902 903 out_flush: 904 intel_fb_obj_flush(obj, false, ORIGIN_GTT); 905 out_unpin: 906 i915_gem_object_ggtt_unpin(obj); 907 out: 908 return ret; 909 } 910 911 /* Per-page copy function for the shmem pwrite fastpath. 912 * Flushes invalid cachelines before writing to the target if 913 * needs_clflush_before is set and flushes out any written cachelines after 914 * writing if needs_clflush is set. */ 915 static int 916 shmem_pwrite_fast(struct vm_page *page, int shmem_page_offset, int page_length, 917 char __user *user_data, 918 bool page_do_bit17_swizzling, 919 bool needs_clflush_before, 920 bool needs_clflush_after) 921 { 922 char *vaddr; 923 int ret; 924 925 if (unlikely(page_do_bit17_swizzling)) 926 return -EINVAL; 927 928 vaddr = kmap_atomic(page); 929 if (needs_clflush_before) 930 drm_clflush_virt_range(vaddr + shmem_page_offset, 931 page_length); 932 ret = __copy_from_user_inatomic(vaddr + shmem_page_offset, 933 user_data, page_length); 934 if (needs_clflush_after) 935 drm_clflush_virt_range(vaddr + shmem_page_offset, 936 page_length); 937 kunmap_atomic(vaddr); 938 939 return ret ? -EFAULT : 0; 940 } 941 942 /* Only difference to the fast-path function is that this can handle bit17 943 * and uses non-atomic copy and kmap functions. */ 944 static int 945 shmem_pwrite_slow(struct vm_page *page, int shmem_page_offset, int page_length, 946 char __user *user_data, 947 bool page_do_bit17_swizzling, 948 bool needs_clflush_before, 949 bool needs_clflush_after) 950 { 951 char *vaddr; 952 int ret; 953 954 vaddr = kmap(page); 955 if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) 956 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 957 page_length, 958 page_do_bit17_swizzling); 959 if (page_do_bit17_swizzling) 960 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset, 961 user_data, 962 page_length); 963 else 964 ret = __copy_from_user(vaddr + shmem_page_offset, 965 user_data, 966 page_length); 967 if (needs_clflush_after) 968 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 969 page_length, 970 page_do_bit17_swizzling); 971 kunmap(page); 972 973 return ret ? -EFAULT : 0; 974 } 975 976 static int 977 i915_gem_shmem_pwrite(struct drm_device *dev, 978 struct drm_i915_gem_object *obj, 979 struct drm_i915_gem_pwrite *args, 980 struct drm_file *file) 981 { 982 ssize_t remain; 983 loff_t offset; 984 char __user *user_data; 985 int shmem_page_offset, page_length, ret = 0; 986 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 987 int hit_slowpath = 0; 988 int needs_clflush_after = 0; 989 int needs_clflush_before = 0; 990 struct sg_page_iter sg_iter; 991 992 user_data = to_user_ptr(args->data_ptr); 993 remain = args->size; 994 995 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 996 997 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 998 /* If we're not in the cpu write domain, set ourself into the gtt 999 * write domain and manually flush cachelines (if required). This 1000 * optimizes for the case when the gpu will use the data 1001 * right away and we therefore have to clflush anyway. */ 1002 needs_clflush_after = cpu_write_needs_clflush(obj); 1003 ret = i915_gem_object_wait_rendering(obj, false); 1004 if (ret) 1005 return ret; 1006 } 1007 /* Same trick applies to invalidate partially written cachelines read 1008 * before writing. */ 1009 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) 1010 needs_clflush_before = 1011 !cpu_cache_is_coherent(dev, obj->cache_level); 1012 1013 ret = i915_gem_object_get_pages(obj); 1014 if (ret) 1015 return ret; 1016 1017 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 1018 1019 i915_gem_object_pin_pages(obj); 1020 1021 offset = args->offset; 1022 obj->dirty = 1; 1023 1024 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 1025 offset >> PAGE_SHIFT) { 1026 struct vm_page *page = sg_page_iter_page(&sg_iter); 1027 int partial_cacheline_write; 1028 1029 if (remain <= 0) 1030 break; 1031 1032 /* Operation in this page 1033 * 1034 * shmem_page_offset = offset within page in shmem file 1035 * page_length = bytes to copy for this page 1036 */ 1037 shmem_page_offset = offset_in_page(offset); 1038 1039 page_length = remain; 1040 if ((shmem_page_offset + page_length) > PAGE_SIZE) 1041 page_length = PAGE_SIZE - shmem_page_offset; 1042 1043 /* If we don't overwrite a cacheline completely we need to be 1044 * careful to have up-to-date data by first clflushing. Don't 1045 * overcomplicate things and flush the entire patch. */ 1046 partial_cacheline_write = needs_clflush_before && 1047 ((shmem_page_offset | page_length) 1048 & (curcpu()->ci_cflushsz - 1)); 1049 1050 page_do_bit17_swizzling = obj_do_bit17_swizzling && 1051 (page_to_phys(page) & (1 << 17)) != 0; 1052 1053 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length, 1054 user_data, page_do_bit17_swizzling, 1055 partial_cacheline_write, 1056 needs_clflush_after); 1057 if (ret == 0) 1058 goto next_page; 1059 1060 hit_slowpath = 1; 1061 mutex_unlock(&dev->struct_mutex); 1062 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length, 1063 user_data, page_do_bit17_swizzling, 1064 partial_cacheline_write, 1065 needs_clflush_after); 1066 1067 mutex_lock(&dev->struct_mutex); 1068 1069 if (ret) 1070 goto out; 1071 1072 next_page: 1073 remain -= page_length; 1074 user_data += page_length; 1075 offset += page_length; 1076 } 1077 1078 out: 1079 i915_gem_object_unpin_pages(obj); 1080 1081 if (hit_slowpath) { 1082 /* 1083 * Fixup: Flush cpu caches in case we didn't flush the dirty 1084 * cachelines in-line while writing and the object moved 1085 * out of the cpu write domain while we've dropped the lock. 1086 */ 1087 if (!needs_clflush_after && 1088 obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 1089 if (i915_gem_clflush_object(obj, obj->pin_display)) 1090 needs_clflush_after = true; 1091 } 1092 } 1093 1094 if (needs_clflush_after) 1095 i915_gem_chipset_flush(dev); 1096 else 1097 obj->cache_dirty = true; 1098 1099 intel_fb_obj_flush(obj, false, ORIGIN_CPU); 1100 return ret; 1101 } 1102 1103 /** 1104 * Writes data to the object referenced by handle. 1105 * 1106 * On error, the contents of the buffer that were to be modified are undefined. 1107 */ 1108 int 1109 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 1110 struct drm_file *file) 1111 { 1112 struct drm_i915_private *dev_priv = dev->dev_private; 1113 struct drm_i915_gem_pwrite *args = data; 1114 struct drm_i915_gem_object *obj; 1115 int ret; 1116 1117 if (args->size == 0) 1118 return 0; 1119 1120 if (!access_ok(VERIFY_READ, 1121 to_user_ptr(args->data_ptr), 1122 args->size)) 1123 return -EFAULT; 1124 1125 #ifdef __linux__ 1126 if (likely(!i915.prefault_disable)) { 1127 ret = fault_in_multipages_readable(to_user_ptr(args->data_ptr), 1128 args->size); 1129 if (ret) 1130 return -EFAULT; 1131 } 1132 #endif 1133 1134 intel_runtime_pm_get(dev_priv); 1135 1136 ret = i915_mutex_lock_interruptible(dev); 1137 if (ret) 1138 goto put_rpm; 1139 1140 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1141 if (&obj->base == NULL) { 1142 ret = -ENOENT; 1143 goto unlock; 1144 } 1145 1146 /* Bounds check destination. */ 1147 if (args->offset > obj->base.size || 1148 args->size > obj->base.size - args->offset) { 1149 ret = -EINVAL; 1150 goto out; 1151 } 1152 1153 /* prime objects have no backing filp to GEM pread/pwrite 1154 * pages from. 1155 */ 1156 if (!obj->base.filp) { 1157 ret = -EINVAL; 1158 goto out; 1159 } 1160 1161 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 1162 1163 ret = -EFAULT; 1164 /* We can only do the GTT pwrite on untiled buffers, as otherwise 1165 * it would end up going through the fenced access, and we'll get 1166 * different detiling behavior between reading and writing. 1167 * pread/pwrite currently are reading and writing from the CPU 1168 * perspective, requiring manual detiling by the client. 1169 */ 1170 if (obj->tiling_mode == I915_TILING_NONE && 1171 obj->base.write_domain != I915_GEM_DOMAIN_CPU && 1172 cpu_write_needs_clflush(obj)) { 1173 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); 1174 /* Note that the gtt paths might fail with non-page-backed user 1175 * pointers (e.g. gtt mappings when moving data between 1176 * textures). Fallback to the shmem path in that case. */ 1177 } 1178 1179 if (ret == -EFAULT || ret == -ENOSPC) { 1180 if (obj->phys_handle) 1181 ret = i915_gem_phys_pwrite(obj, args, file); 1182 else 1183 ret = i915_gem_shmem_pwrite(dev, obj, args, file); 1184 } 1185 1186 out: 1187 drm_gem_object_unreference(&obj->base); 1188 unlock: 1189 mutex_unlock(&dev->struct_mutex); 1190 put_rpm: 1191 intel_runtime_pm_put(dev_priv); 1192 1193 return ret; 1194 } 1195 1196 int 1197 i915_gem_check_wedge(struct i915_gpu_error *error, 1198 bool interruptible) 1199 { 1200 if (i915_reset_in_progress(error)) { 1201 /* Non-interruptible callers can't handle -EAGAIN, hence return 1202 * -EIO unconditionally for these. */ 1203 if (!interruptible) 1204 return -EIO; 1205 1206 /* Recovery complete, but the reset failed ... */ 1207 if (i915_terminally_wedged(error)) 1208 return -EIO; 1209 1210 /* 1211 * Check if GPU Reset is in progress - we need intel_ring_begin 1212 * to work properly to reinit the hw state while the gpu is 1213 * still marked as reset-in-progress. Handle this with a flag. 1214 */ 1215 if (!error->reload_in_reset) 1216 return -EAGAIN; 1217 } 1218 1219 return 0; 1220 } 1221 1222 #ifdef __linux__ 1223 static void fake_irq(unsigned long data) 1224 { 1225 wake_up_process((struct task_struct *)data); 1226 } 1227 #endif 1228 1229 static bool missed_irq(struct drm_i915_private *dev_priv, 1230 struct intel_engine_cs *ring) 1231 { 1232 return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings); 1233 } 1234 1235 #ifdef __linux__ 1236 static unsigned long local_clock_us(unsigned *cpu) 1237 { 1238 unsigned long t; 1239 1240 /* Cheaply and approximately convert from nanoseconds to microseconds. 1241 * The result and subsequent calculations are also defined in the same 1242 * approximate microseconds units. The principal source of timing 1243 * error here is from the simple truncation. 1244 * 1245 * Note that local_clock() is only defined wrt to the current CPU; 1246 * the comparisons are no longer valid if we switch CPUs. Instead of 1247 * blocking preemption for the entire busywait, we can detect the CPU 1248 * switch and use that as indicator of system load and a reason to 1249 * stop busywaiting, see busywait_stop(). 1250 */ 1251 *cpu = get_cpu(); 1252 t = local_clock() >> 10; 1253 put_cpu(); 1254 1255 return t; 1256 } 1257 #else 1258 static unsigned long local_clock_us(unsigned *cpu) 1259 { 1260 *cpu = cpu_number(); 1261 return ticks * tick; 1262 } 1263 #endif 1264 1265 static bool busywait_stop(unsigned long timeout, unsigned cpu) 1266 { 1267 unsigned this_cpu; 1268 1269 if (time_after(local_clock_us(&this_cpu), timeout)) 1270 return true; 1271 1272 return this_cpu != cpu; 1273 } 1274 1275 static int __i915_spin_request(struct drm_i915_gem_request *req, int state) 1276 { 1277 unsigned long timeout; 1278 unsigned cpu; 1279 1280 /* When waiting for high frequency requests, e.g. during synchronous 1281 * rendering split between the CPU and GPU, the finite amount of time 1282 * required to set up the irq and wait upon it limits the response 1283 * rate. By busywaiting on the request completion for a short while we 1284 * can service the high frequency waits as quick as possible. However, 1285 * if it is a slow request, we want to sleep as quickly as possible. 1286 * The tradeoff between waiting and sleeping is roughly the time it 1287 * takes to sleep on a request, on the order of a microsecond. 1288 */ 1289 1290 if (req->ring->irq_refcount) 1291 return -EBUSY; 1292 1293 /* Only spin if we know the GPU is processing this request */ 1294 if (!i915_gem_request_started(req, true)) 1295 return -EAGAIN; 1296 1297 timeout = local_clock_us(&cpu) + 5; 1298 while (!drm_need_resched()) { 1299 if (i915_gem_request_completed(req, true)) 1300 return 0; 1301 1302 if (signal_pending_state(state, current)) 1303 break; 1304 1305 if (busywait_stop(timeout, cpu)) 1306 break; 1307 1308 cpu_relax_lowlatency(); 1309 } 1310 1311 if (i915_gem_request_completed(req, false)) 1312 return 0; 1313 1314 return -EAGAIN; 1315 } 1316 1317 #ifdef __linux__ 1318 /** 1319 * __i915_wait_request - wait until execution of request has finished 1320 * @req: duh! 1321 * @reset_counter: reset sequence associated with the given request 1322 * @interruptible: do an interruptible wait (normally yes) 1323 * @timeout: in - how long to wait (NULL forever); out - how much time remaining 1324 * 1325 * Note: It is of utmost importance that the passed in seqno and reset_counter 1326 * values have been read by the caller in an smp safe manner. Where read-side 1327 * locks are involved, it is sufficient to read the reset_counter before 1328 * unlocking the lock that protects the seqno. For lockless tricks, the 1329 * reset_counter _must_ be read before, and an appropriate smp_rmb must be 1330 * inserted. 1331 * 1332 * Returns 0 if the request was found within the alloted time. Else returns the 1333 * errno with remaining time filled in timeout argument. 1334 */ 1335 int __i915_wait_request(struct drm_i915_gem_request *req, 1336 unsigned reset_counter, 1337 bool interruptible, 1338 s64 *timeout, 1339 struct intel_rps_client *rps) 1340 { 1341 struct intel_engine_cs *ring = i915_gem_request_get_ring(req); 1342 struct drm_device *dev = ring->dev; 1343 struct drm_i915_private *dev_priv = dev->dev_private; 1344 const bool irq_test_in_progress = 1345 ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring); 1346 int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; 1347 DEFINE_WAIT(wait); 1348 unsigned long timeout_expire; 1349 s64 before, now; 1350 int ret; 1351 1352 WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled"); 1353 1354 if (list_empty(&req->list)) 1355 return 0; 1356 1357 if (i915_gem_request_completed(req, true)) 1358 return 0; 1359 1360 timeout_expire = 0; 1361 if (timeout) { 1362 if (WARN_ON(*timeout < 0)) 1363 return -EINVAL; 1364 1365 if (*timeout == 0) 1366 return -ETIME; 1367 1368 timeout_expire = jiffies + nsecs_to_jiffies_timeout(*timeout); 1369 } 1370 1371 if (INTEL_INFO(dev_priv)->gen >= 6) 1372 gen6_rps_boost(dev_priv, rps, req->emitted_jiffies); 1373 1374 /* Record current time in case interrupted by signal, or wedged */ 1375 trace_i915_gem_request_wait_begin(req); 1376 before = ktime_get_raw_ns(); 1377 1378 /* Optimistic spin for the next jiffie before touching IRQs */ 1379 ret = __i915_spin_request(req, state); 1380 if (ret == 0) 1381 goto out; 1382 1383 if (!irq_test_in_progress && WARN_ON(!ring->irq_get(ring))) { 1384 ret = -ENODEV; 1385 goto out; 1386 } 1387 1388 for (;;) { 1389 struct timer_list timer; 1390 1391 prepare_to_wait(&ring->irq_queue, &wait, state); 1392 1393 /* We need to check whether any gpu reset happened in between 1394 * the caller grabbing the seqno and now ... */ 1395 if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) { 1396 /* ... but upgrade the -EAGAIN to an -EIO if the gpu 1397 * is truely gone. */ 1398 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); 1399 if (ret == 0) 1400 ret = -EAGAIN; 1401 break; 1402 } 1403 1404 if (i915_gem_request_completed(req, false)) { 1405 ret = 0; 1406 break; 1407 } 1408 1409 if (signal_pending_state(state, current)) { 1410 ret = -ERESTARTSYS; 1411 break; 1412 } 1413 1414 if (timeout && time_after_eq(jiffies, timeout_expire)) { 1415 ret = -ETIME; 1416 break; 1417 } 1418 1419 timer.function = NULL; 1420 if (timeout || missed_irq(dev_priv, ring)) { 1421 unsigned long expire; 1422 1423 setup_timer_on_stack(&timer, fake_irq, (unsigned long)current); 1424 expire = missed_irq(dev_priv, ring) ? jiffies + 1 : timeout_expire; 1425 mod_timer(&timer, expire); 1426 } 1427 1428 io_schedule(); 1429 1430 if (timer.function) { 1431 del_singleshot_timer_sync(&timer); 1432 destroy_timer_on_stack(&timer); 1433 } 1434 } 1435 if (!irq_test_in_progress) 1436 ring->irq_put(ring); 1437 1438 finish_wait(&ring->irq_queue, &wait); 1439 1440 out: 1441 now = ktime_get_raw_ns(); 1442 trace_i915_gem_request_wait_end(req); 1443 1444 if (timeout) { 1445 s64 tres = *timeout - (now - before); 1446 1447 *timeout = tres < 0 ? 0 : tres; 1448 1449 /* 1450 * Apparently ktime isn't accurate enough and occasionally has a 1451 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch 1452 * things up to make the test happy. We allow up to 1 jiffy. 1453 * 1454 * This is a regrssion from the timespec->ktime conversion. 1455 */ 1456 if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000) 1457 *timeout = 0; 1458 } 1459 1460 return ret; 1461 } 1462 #else 1463 int __i915_wait_request(struct drm_i915_gem_request *req, 1464 unsigned reset_counter, 1465 bool interruptible, 1466 s64 *timeout, 1467 struct intel_rps_client *rps) 1468 { 1469 struct intel_engine_cs *ring = i915_gem_request_get_ring(req); 1470 struct drm_device *dev = ring->dev; 1471 struct drm_i915_private *dev_priv = dev->dev_private; 1472 const bool irq_test_in_progress = 1473 ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring); 1474 int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; 1475 struct sleep_state sls; 1476 unsigned long timeout_expire; 1477 s64 before, now; 1478 int ret; 1479 1480 WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled"); 1481 1482 if (list_empty(&req->list)) 1483 return 0; 1484 1485 if (i915_gem_request_completed(req, true)) 1486 return 0; 1487 1488 timeout_expire = 0; 1489 if (timeout) { 1490 if (WARN_ON(*timeout < 0)) 1491 return -EINVAL; 1492 1493 if (*timeout == 0) 1494 return -ETIME; 1495 1496 timeout_expire = jiffies + nsecs_to_jiffies_timeout(*timeout); 1497 } 1498 1499 if (INTEL_INFO(dev_priv)->gen >= 6) 1500 gen6_rps_boost(dev_priv, rps, req->emitted_jiffies); 1501 1502 /* Record current time in case interrupted by signal, or wedged */ 1503 trace_i915_gem_request_wait_begin(req); 1504 before = ktime_get_raw_ns(); 1505 1506 /* Optimistic spin for the next jiffie before touching IRQs */ 1507 ret = __i915_spin_request(req, state); 1508 if (ret == 0) 1509 goto out; 1510 1511 if (!irq_test_in_progress && WARN_ON(!ring->irq_get(ring))) { 1512 ret = -ENODEV; 1513 goto out; 1514 } 1515 1516 KASSERT(!cold); 1517 for (;;) { 1518 sleep_setup(&sls, &ring->irq_queue, state, "wseq"); 1519 1520 /* We need to check whether any gpu reset happened in between 1521 * the caller grabbing the seqno and now ... */ 1522 if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) { 1523 /* ... but upgrade the -EAGAIN to an -EIO if the gpu 1524 * is truely gone. */ 1525 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); 1526 if (ret == 0) 1527 ret = -EAGAIN; 1528 break; 1529 } 1530 1531 if (i915_gem_request_completed(req, false)) { 1532 ret = 0; 1533 break; 1534 } 1535 1536 if (interruptible && ret) { 1537 ret = -ERESTARTSYS; 1538 break; 1539 } 1540 1541 if (timeout && time_after_eq(jiffies, timeout_expire)) { 1542 ret = -ETIME; 1543 break; 1544 } 1545 1546 if (timeout || missed_irq(dev_priv, ring)) { 1547 unsigned long expire; 1548 int timo; 1549 1550 expire = missed_irq(dev_priv, ring) ? jiffies + 1 : timeout_expire; 1551 timo = expire - jiffies; 1552 if (timo < 1) 1553 timo = 1; 1554 sleep_setup_timeout(&sls, timo); 1555 } 1556 1557 sleep_setup_signal(&sls, state); 1558 1559 sleep_finish(&sls, 1); 1560 sleep_finish_timeout(&sls); 1561 ret = sleep_finish_signal(&sls); 1562 } 1563 if (!irq_test_in_progress) 1564 ring->irq_put(ring); 1565 1566 sleep_finish(&sls, 0); 1567 sleep_finish_timeout(&sls); 1568 sleep_finish_signal(&sls); 1569 1570 out: 1571 now = ktime_get_raw_ns(); 1572 trace_i915_gem_request_wait_end(req); 1573 1574 if (timeout) { 1575 s64 tres = *timeout - (now - before); 1576 1577 *timeout = tres < 0 ? 0 : tres; 1578 1579 /* 1580 * Apparently ktime isn't accurate enough and occasionally has a 1581 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch 1582 * things up to make the test happy. We allow up to 1 jiffy. 1583 * 1584 * This is a regrssion from the timespec->ktime conversion. 1585 */ 1586 if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000) 1587 *timeout = 0; 1588 } 1589 1590 return ret; 1591 } 1592 #endif 1593 1594 int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, 1595 struct drm_file *file) 1596 { 1597 struct drm_i915_private *dev_private; 1598 struct drm_i915_file_private *file_priv; 1599 1600 WARN_ON(!req || !file || req->file_priv); 1601 1602 if (!req || !file) 1603 return -EINVAL; 1604 1605 if (req->file_priv) 1606 return -EINVAL; 1607 1608 dev_private = req->ring->dev->dev_private; 1609 file_priv = file->driver_priv; 1610 1611 spin_lock(&file_priv->mm.lock); 1612 req->file_priv = file_priv; 1613 list_add_tail(&req->client_list, &file_priv->mm.request_list); 1614 spin_unlock(&file_priv->mm.lock); 1615 1616 #ifdef __linux__ 1617 req->pid = get_pid(task_pid(current)); 1618 #endif 1619 1620 return 0; 1621 } 1622 1623 static inline void 1624 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) 1625 { 1626 struct drm_i915_file_private *file_priv = request->file_priv; 1627 1628 if (!file_priv) 1629 return; 1630 1631 spin_lock(&file_priv->mm.lock); 1632 list_del(&request->client_list); 1633 request->file_priv = NULL; 1634 spin_unlock(&file_priv->mm.lock); 1635 1636 #ifdef __linux__ 1637 put_pid(request->pid); 1638 request->pid = NULL; 1639 #endif 1640 } 1641 1642 static void i915_gem_request_retire(struct drm_i915_gem_request *request) 1643 { 1644 trace_i915_gem_request_retire(request); 1645 1646 /* We know the GPU must have read the request to have 1647 * sent us the seqno + interrupt, so use the position 1648 * of tail of the request to update the last known position 1649 * of the GPU head. 1650 * 1651 * Note this requires that we are always called in request 1652 * completion order. 1653 */ 1654 request->ringbuf->last_retired_head = request->postfix; 1655 1656 list_del_init(&request->list); 1657 i915_gem_request_remove_from_client(request); 1658 1659 i915_gem_request_unreference(request); 1660 } 1661 1662 static void 1663 __i915_gem_request_retire__upto(struct drm_i915_gem_request *req) 1664 { 1665 struct intel_engine_cs *engine = req->ring; 1666 struct drm_i915_gem_request *tmp; 1667 1668 lockdep_assert_held(&engine->dev->struct_mutex); 1669 1670 if (list_empty(&req->list)) 1671 return; 1672 1673 do { 1674 tmp = list_first_entry(&engine->request_list, 1675 typeof(*tmp), list); 1676 1677 i915_gem_request_retire(tmp); 1678 } while (tmp != req); 1679 1680 WARN_ON(i915_verify_lists(engine->dev)); 1681 } 1682 1683 /** 1684 * Waits for a request to be signaled, and cleans up the 1685 * request and object lists appropriately for that event. 1686 */ 1687 int 1688 i915_wait_request(struct drm_i915_gem_request *req) 1689 { 1690 struct drm_device *dev; 1691 struct drm_i915_private *dev_priv; 1692 bool interruptible; 1693 int ret; 1694 1695 BUG_ON(req == NULL); 1696 1697 dev = req->ring->dev; 1698 dev_priv = dev->dev_private; 1699 interruptible = dev_priv->mm.interruptible; 1700 1701 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1702 1703 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); 1704 if (ret) 1705 return ret; 1706 1707 ret = __i915_wait_request(req, 1708 atomic_read(&dev_priv->gpu_error.reset_counter), 1709 interruptible, NULL, NULL); 1710 if (ret) 1711 return ret; 1712 1713 __i915_gem_request_retire__upto(req); 1714 return 0; 1715 } 1716 1717 /** 1718 * Ensures that all rendering to the object has completed and the object is 1719 * safe to unbind from the GTT or access from the CPU. 1720 */ 1721 int 1722 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, 1723 bool readonly) 1724 { 1725 int ret, i; 1726 1727 if (!obj->active) 1728 return 0; 1729 1730 if (readonly) { 1731 if (obj->last_write_req != NULL) { 1732 ret = i915_wait_request(obj->last_write_req); 1733 if (ret) 1734 return ret; 1735 1736 i = obj->last_write_req->ring->id; 1737 if (obj->last_read_req[i] == obj->last_write_req) 1738 i915_gem_object_retire__read(obj, i); 1739 else 1740 i915_gem_object_retire__write(obj); 1741 } 1742 } else { 1743 for (i = 0; i < I915_NUM_RINGS; i++) { 1744 if (obj->last_read_req[i] == NULL) 1745 continue; 1746 1747 ret = i915_wait_request(obj->last_read_req[i]); 1748 if (ret) 1749 return ret; 1750 1751 i915_gem_object_retire__read(obj, i); 1752 } 1753 RQ_BUG_ON(obj->active); 1754 } 1755 1756 return 0; 1757 } 1758 1759 static void 1760 i915_gem_object_retire_request(struct drm_i915_gem_object *obj, 1761 struct drm_i915_gem_request *req) 1762 { 1763 int ring = req->ring->id; 1764 1765 if (obj->last_read_req[ring] == req) 1766 i915_gem_object_retire__read(obj, ring); 1767 else if (obj->last_write_req == req) 1768 i915_gem_object_retire__write(obj); 1769 1770 __i915_gem_request_retire__upto(req); 1771 } 1772 1773 /* A nonblocking variant of the above wait. This is a highly dangerous routine 1774 * as the object state may change during this call. 1775 */ 1776 static __must_check int 1777 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, 1778 struct intel_rps_client *rps, 1779 bool readonly) 1780 { 1781 struct drm_device *dev = obj->base.dev; 1782 struct drm_i915_private *dev_priv = dev->dev_private; 1783 struct drm_i915_gem_request *requests[I915_NUM_RINGS]; 1784 unsigned reset_counter; 1785 int ret, i, n = 0; 1786 1787 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1788 BUG_ON(!dev_priv->mm.interruptible); 1789 1790 if (!obj->active) 1791 return 0; 1792 1793 ret = i915_gem_check_wedge(&dev_priv->gpu_error, true); 1794 if (ret) 1795 return ret; 1796 1797 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 1798 1799 if (readonly) { 1800 struct drm_i915_gem_request *req; 1801 1802 req = obj->last_write_req; 1803 if (req == NULL) 1804 return 0; 1805 1806 requests[n++] = i915_gem_request_reference(req); 1807 } else { 1808 for (i = 0; i < I915_NUM_RINGS; i++) { 1809 struct drm_i915_gem_request *req; 1810 1811 req = obj->last_read_req[i]; 1812 if (req == NULL) 1813 continue; 1814 1815 requests[n++] = i915_gem_request_reference(req); 1816 } 1817 } 1818 1819 mutex_unlock(&dev->struct_mutex); 1820 for (i = 0; ret == 0 && i < n; i++) 1821 ret = __i915_wait_request(requests[i], reset_counter, true, 1822 NULL, rps); 1823 mutex_lock(&dev->struct_mutex); 1824 1825 for (i = 0; i < n; i++) { 1826 if (ret == 0) 1827 i915_gem_object_retire_request(obj, requests[i]); 1828 i915_gem_request_unreference(requests[i]); 1829 } 1830 1831 return ret; 1832 } 1833 1834 static struct intel_rps_client *to_rps_client(struct drm_file *file) 1835 { 1836 struct drm_i915_file_private *fpriv = file->driver_priv; 1837 return &fpriv->rps; 1838 } 1839 1840 /** 1841 * Called when user space prepares to use an object with the CPU, either 1842 * through the mmap ioctl's mapping or a GTT mapping. 1843 */ 1844 int 1845 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1846 struct drm_file *file) 1847 { 1848 struct drm_i915_gem_set_domain *args = data; 1849 struct drm_i915_gem_object *obj; 1850 uint32_t read_domains = args->read_domains; 1851 uint32_t write_domain = args->write_domain; 1852 int ret; 1853 1854 /* Only handle setting domains to types used by the CPU. */ 1855 if (write_domain & I915_GEM_GPU_DOMAINS) 1856 return -EINVAL; 1857 1858 if (read_domains & I915_GEM_GPU_DOMAINS) 1859 return -EINVAL; 1860 1861 /* Having something in the write domain implies it's in the read 1862 * domain, and only that read domain. Enforce that in the request. 1863 */ 1864 if (write_domain != 0 && read_domains != write_domain) 1865 return -EINVAL; 1866 1867 ret = i915_mutex_lock_interruptible(dev); 1868 if (ret) 1869 return ret; 1870 1871 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1872 if (&obj->base == NULL) { 1873 ret = -ENOENT; 1874 goto unlock; 1875 } 1876 1877 /* Try to flush the object off the GPU without holding the lock. 1878 * We will repeat the flush holding the lock in the normal manner 1879 * to catch cases where we are gazumped. 1880 */ 1881 ret = i915_gem_object_wait_rendering__nonblocking(obj, 1882 to_rps_client(file), 1883 !write_domain); 1884 if (ret) 1885 goto unref; 1886 1887 if (read_domains & I915_GEM_DOMAIN_GTT) 1888 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 1889 else 1890 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 1891 1892 if (write_domain != 0) 1893 intel_fb_obj_invalidate(obj, 1894 write_domain == I915_GEM_DOMAIN_GTT ? 1895 ORIGIN_GTT : ORIGIN_CPU); 1896 1897 unref: 1898 drm_gem_object_unreference(&obj->base); 1899 unlock: 1900 mutex_unlock(&dev->struct_mutex); 1901 return ret; 1902 } 1903 1904 /** 1905 * Called when user space has done writes to this buffer 1906 */ 1907 int 1908 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1909 struct drm_file *file) 1910 { 1911 struct drm_i915_gem_sw_finish *args = data; 1912 struct drm_i915_gem_object *obj; 1913 int ret = 0; 1914 1915 ret = i915_mutex_lock_interruptible(dev); 1916 if (ret) 1917 return ret; 1918 1919 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1920 if (&obj->base == NULL) { 1921 ret = -ENOENT; 1922 goto unlock; 1923 } 1924 1925 /* Pinned buffers may be scanout, so flush the cache */ 1926 if (obj->pin_display) 1927 i915_gem_object_flush_cpu_write_domain(obj); 1928 1929 drm_gem_object_unreference(&obj->base); 1930 unlock: 1931 mutex_unlock(&dev->struct_mutex); 1932 return ret; 1933 } 1934 1935 /** 1936 * Maps the contents of an object, returning the address it is mapped 1937 * into. 1938 * 1939 * While the mapping holds a reference on the contents of the object, it doesn't 1940 * imply a ref on the object itself. 1941 * 1942 * IMPORTANT: 1943 * 1944 * DRM driver writers who look a this function as an example for how to do GEM 1945 * mmap support, please don't implement mmap support like here. The modern way 1946 * to implement DRM mmap support is with an mmap offset ioctl (like 1947 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly. 1948 * That way debug tooling like valgrind will understand what's going on, hiding 1949 * the mmap call in a driver private ioctl will break that. The i915 driver only 1950 * does cpu mmaps this way because we didn't know better. 1951 */ 1952 int 1953 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1954 struct drm_file *file) 1955 { 1956 struct drm_i915_gem_mmap *args = data; 1957 struct drm_gem_object *obj; 1958 vaddr_t addr; 1959 vsize_t size; 1960 int ret; 1961 1962 #ifdef __OpenBSD__ 1963 if (args->size == 0 || args->offset & PAGE_MASK) 1964 return -EINVAL; 1965 size = round_page(args->size); 1966 if (args->offset + size < args->offset) 1967 return -EINVAL; 1968 #endif 1969 1970 if (args->flags & ~(I915_MMAP_WC)) 1971 return -EINVAL; 1972 1973 if (args->flags & I915_MMAP_WC && !cpu_has_pat) 1974 return -ENODEV; 1975 1976 obj = drm_gem_object_lookup(dev, file, args->handle); 1977 if (obj == NULL) 1978 return -ENOENT; 1979 1980 /* prime objects have no backing filp to GEM mmap 1981 * pages from. 1982 */ 1983 if (!obj->filp) { 1984 drm_gem_object_unreference_unlocked(obj); 1985 return -EINVAL; 1986 } 1987 1988 #ifdef __linux__ 1989 addr = vm_mmap(obj->filp, 0, args->size, 1990 PROT_READ | PROT_WRITE, MAP_SHARED, 1991 args->offset); 1992 if (args->flags & I915_MMAP_WC) { 1993 struct mm_struct *mm = current->mm; 1994 struct vm_area_struct *vma; 1995 1996 down_write(&mm->mmap_sem); 1997 vma = find_vma(mm, addr); 1998 if (vma) 1999 vma->vm_page_prot = 2000 pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); 2001 else 2002 addr = -ENOMEM; 2003 up_write(&mm->mmap_sem); 2004 } 2005 drm_gem_object_unreference_unlocked(obj); 2006 if (IS_ERR((void *)addr)) 2007 return addr; 2008 #else 2009 addr = 0; 2010 ret = -uvm_map(&curproc->p_vmspace->vm_map, &addr, size, 2011 obj->uao, args->offset, 0, UVM_MAPFLAG(PROT_READ | PROT_WRITE, 2012 PROT_READ | PROT_WRITE, MAP_INHERIT_SHARE, MADV_RANDOM, 0)); 2013 if (ret == 0) 2014 uao_reference(obj->uao); 2015 drm_gem_object_unreference_unlocked(obj); 2016 if (ret) 2017 return ret; 2018 #endif 2019 2020 args->addr_ptr = (uint64_t) addr; 2021 2022 return 0; 2023 } 2024 2025 #ifdef __linux__ 2026 2027 /** 2028 * i915_gem_fault - fault a page into the GTT 2029 * @vma: VMA in question 2030 * @vmf: fault info 2031 * 2032 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 2033 * from userspace. The fault handler takes care of binding the object to 2034 * the GTT (if needed), allocating and programming a fence register (again, 2035 * only if needed based on whether the old reg is still valid or the object 2036 * is tiled) and inserting a new PTE into the faulting process. 2037 * 2038 * Note that the faulting process may involve evicting existing objects 2039 * from the GTT and/or fence registers to make room. So performance may 2040 * suffer if the GTT working set is large or there are few fence registers 2041 * left. 2042 */ 2043 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 2044 { 2045 struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data); 2046 struct drm_device *dev = obj->base.dev; 2047 struct drm_i915_private *dev_priv = dev->dev_private; 2048 struct i915_ggtt_view view = i915_ggtt_view_normal; 2049 pgoff_t page_offset; 2050 unsigned long pfn; 2051 int ret = 0; 2052 bool write = !!(vmf->flags & FAULT_FLAG_WRITE); 2053 2054 intel_runtime_pm_get(dev_priv); 2055 2056 /* We don't use vmf->pgoff since that has the fake offset */ 2057 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >> 2058 PAGE_SHIFT; 2059 2060 ret = i915_mutex_lock_interruptible(dev); 2061 if (ret) 2062 goto out; 2063 2064 trace_i915_gem_object_fault(obj, page_offset, true, write); 2065 2066 /* Try to flush the object off the GPU first without holding the lock. 2067 * Upon reacquiring the lock, we will perform our sanity checks and then 2068 * repeat the flush holding the lock in the normal manner to catch cases 2069 * where we are gazumped. 2070 */ 2071 ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write); 2072 if (ret) 2073 goto unlock; 2074 2075 /* Access to snoopable pages through the GTT is incoherent. */ 2076 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) { 2077 ret = -EFAULT; 2078 goto unlock; 2079 } 2080 2081 /* Use a partial view if the object is bigger than the aperture. */ 2082 if (obj->base.size >= dev_priv->gtt.mappable_end && 2083 obj->tiling_mode == I915_TILING_NONE) { 2084 static const unsigned int chunk_size = 256; // 1 MiB 2085 2086 memset(&view, 0, sizeof(view)); 2087 view.type = I915_GGTT_VIEW_PARTIAL; 2088 view.params.partial.offset = rounddown(page_offset, chunk_size); 2089 view.params.partial.size = 2090 min_t(unsigned int, 2091 chunk_size, 2092 (vma->vm_end - vma->vm_start)/PAGE_SIZE - 2093 view.params.partial.offset); 2094 } 2095 2096 /* Now pin it into the GTT if needed */ 2097 ret = i915_gem_object_ggtt_pin(obj, &view, 0, PIN_MAPPABLE); 2098 if (ret) 2099 goto unlock; 2100 2101 ret = i915_gem_object_set_to_gtt_domain(obj, write); 2102 if (ret) 2103 goto unpin; 2104 2105 ret = i915_gem_object_get_fence(obj); 2106 if (ret) 2107 goto unpin; 2108 2109 /* Finally, remap it using the new GTT offset */ 2110 pfn = dev_priv->gtt.mappable_base + 2111 i915_gem_obj_ggtt_offset_view(obj, &view); 2112 pfn >>= PAGE_SHIFT; 2113 2114 if (unlikely(view.type == I915_GGTT_VIEW_PARTIAL)) { 2115 /* Overriding existing pages in partial view does not cause 2116 * us any trouble as TLBs are still valid because the fault 2117 * is due to userspace losing part of the mapping or never 2118 * having accessed it before (at this partials' range). 2119 */ 2120 unsigned long base = vma->vm_start + 2121 (view.params.partial.offset << PAGE_SHIFT); 2122 unsigned int i; 2123 2124 for (i = 0; i < view.params.partial.size; i++) { 2125 ret = vm_insert_pfn(vma, base + i * PAGE_SIZE, pfn + i); 2126 if (ret) 2127 break; 2128 } 2129 2130 obj->fault_mappable = true; 2131 } else { 2132 if (!obj->fault_mappable) { 2133 unsigned long size = min_t(unsigned long, 2134 vma->vm_end - vma->vm_start, 2135 obj->base.size); 2136 int i; 2137 2138 for (i = 0; i < size >> PAGE_SHIFT; i++) { 2139 ret = vm_insert_pfn(vma, 2140 (unsigned long)vma->vm_start + i * PAGE_SIZE, 2141 pfn + i); 2142 if (ret) 2143 break; 2144 } 2145 2146 obj->fault_mappable = true; 2147 } else 2148 ret = vm_insert_pfn(vma, 2149 (unsigned long)vmf->virtual_address, 2150 pfn + page_offset); 2151 } 2152 unpin: 2153 i915_gem_object_ggtt_unpin_view(obj, &view); 2154 unlock: 2155 mutex_unlock(&dev->struct_mutex); 2156 out: 2157 switch (ret) { 2158 case -EIO: 2159 /* 2160 * We eat errors when the gpu is terminally wedged to avoid 2161 * userspace unduly crashing (gl has no provisions for mmaps to 2162 * fail). But any other -EIO isn't ours (e.g. swap in failure) 2163 * and so needs to be reported. 2164 */ 2165 if (!i915_terminally_wedged(&dev_priv->gpu_error)) { 2166 ret = VM_FAULT_SIGBUS; 2167 break; 2168 } 2169 case -EAGAIN: 2170 /* 2171 * EAGAIN means the gpu is hung and we'll wait for the error 2172 * handler to reset everything when re-faulting in 2173 * i915_mutex_lock_interruptible. 2174 */ 2175 case 0: 2176 case -ERESTARTSYS: 2177 case -EINTR: 2178 case -EBUSY: 2179 /* 2180 * EBUSY is ok: this just means that another thread 2181 * already did the job. 2182 */ 2183 ret = VM_FAULT_NOPAGE; 2184 break; 2185 case -ENOMEM: 2186 ret = VM_FAULT_OOM; 2187 break; 2188 case -ENOSPC: 2189 case -EFAULT: 2190 ret = VM_FAULT_SIGBUS; 2191 break; 2192 default: 2193 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 2194 ret = VM_FAULT_SIGBUS; 2195 break; 2196 } 2197 2198 intel_runtime_pm_put(dev_priv); 2199 return ret; 2200 } 2201 2202 #else 2203 2204 int 2205 i915_gem_fault(struct drm_gem_object *gem_obj, struct uvm_faultinfo *ufi, 2206 off_t offset, vaddr_t vaddr, vm_page_t *pps, int npages, int centeridx, 2207 vm_prot_t access_type, int flags) 2208 { 2209 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 2210 struct drm_device *dev = obj->base.dev; 2211 struct drm_i915_private *dev_priv = dev->dev_private; 2212 struct i915_ggtt_view view = i915_ggtt_view_normal; 2213 paddr_t paddr; 2214 int lcv, ret = 0; 2215 int write = !!(access_type & PROT_WRITE); 2216 vm_prot_t mapprot; 2217 boolean_t locked = TRUE; 2218 2219 intel_runtime_pm_get(dev_priv); 2220 2221 /* 2222 * If we already own the lock, we must be doing a copyin or 2223 * copyout in one of the fast paths. Return failure such that 2224 * we fall back on the slow path. 2225 */ 2226 if (!drm_vma_node_has_offset(&obj->base.vma_node) || 2227 RWLOCK_OWNER(&dev->struct_mutex) == curproc) { 2228 uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, 2229 &obj->base.uobj, NULL); 2230 ret = VM_PAGER_BAD; 2231 goto out; 2232 } 2233 2234 offset -= drm_vma_node_offset_addr(&obj->base.vma_node); 2235 2236 if (rw_enter(&dev->struct_mutex, RW_NOSLEEP | RW_WRITE) != 0) { 2237 uvmfault_unlockall(ufi, NULL, &obj->base.uobj, NULL); 2238 mutex_lock(&dev->struct_mutex); 2239 locked = uvmfault_relock(ufi); 2240 } 2241 if (!locked) { 2242 mutex_unlock(&dev->struct_mutex); 2243 ret = VM_PAGER_REFAULT; 2244 goto out; 2245 } 2246 2247 /* Try to flush the object off the GPU first without holding the lock. 2248 * Upon reacquiring the lock, we will perform our sanity checks and then 2249 * repeat the flush holding the lock in the normal manner to catch cases 2250 * where we are gazumped. 2251 */ 2252 ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write); 2253 if (ret) 2254 goto unlock; 2255 2256 /* Access to snoopable pages through the GTT is incoherent. */ 2257 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) { 2258 ret = -EINVAL; 2259 goto unlock; 2260 } 2261 2262 /* Now bind it into the GTT if needed */ 2263 ret = i915_gem_object_ggtt_pin(obj, &view, 0, PIN_MAPPABLE); 2264 if (ret) 2265 goto unlock; 2266 2267 ret = i915_gem_object_set_to_gtt_domain(obj, write); 2268 if (ret) 2269 goto unpin; 2270 2271 ret = i915_gem_object_get_fence(obj); 2272 if (ret) 2273 goto unpin; 2274 2275 obj->fault_mappable = true; 2276 2277 mapprot = ufi->entry->protection; 2278 /* 2279 * if it's only a read fault, we only put ourselves into the gtt 2280 * read domain, so make sure we fault again and set ourselves to write. 2281 * this prevents us needing userland to do domain management and get 2282 * it wrong, and makes us fully coherent with the gpu re mmap. 2283 */ 2284 if (write == 0) 2285 mapprot &= ~PROT_WRITE; 2286 /* XXX try and be more efficient when we do this */ 2287 for (lcv = 0 ; lcv < npages ; lcv++, offset += PAGE_SIZE, 2288 vaddr += PAGE_SIZE) { 2289 if ((flags & PGO_ALLPAGES) == 0 && lcv != centeridx) 2290 continue; 2291 2292 if (pps[lcv] == PGO_DONTCARE) 2293 continue; 2294 2295 paddr = dev_priv->gtt.mappable_base + 2296 i915_gem_obj_ggtt_offset(obj) + offset; 2297 2298 if (pmap_enter(ufi->orig_map->pmap, vaddr, paddr, 2299 mapprot, PMAP_CANFAIL | mapprot) != 0) { 2300 i915_gem_object_ggtt_unpin_view(obj, &view); 2301 uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, 2302 NULL, NULL); 2303 mutex_unlock(&dev->struct_mutex); 2304 pmap_update(ufi->orig_map->pmap); 2305 uvm_wait("intelflt"); 2306 ret = VM_PAGER_REFAULT; 2307 goto out; 2308 } 2309 } 2310 unpin: 2311 i915_gem_object_ggtt_unpin_view(obj, &view); 2312 unlock: 2313 uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, NULL, NULL); 2314 mutex_unlock(&dev->struct_mutex); 2315 pmap_update(ufi->orig_map->pmap); 2316 2317 switch (ret) { 2318 case -EIO: 2319 /* 2320 * We eat errors when the gpu is terminally wedged to avoid 2321 * userspace unduly crashing (gl has no provisions for mmaps to 2322 * fail). But any other -EIO isn't ours (e.g. swap in failure) 2323 * and so needs to be reported. 2324 */ 2325 if (!i915_terminally_wedged(&dev_priv->gpu_error)) { 2326 ret = VM_PAGER_ERROR; 2327 break; 2328 } 2329 case -EAGAIN: 2330 /* 2331 * EAGAIN means the gpu is hung and we'll wait for the error 2332 * handler to reset everything when re-faulting in 2333 * i915_mutex_lock_interruptible. 2334 */ 2335 case 0: 2336 case -ERESTART: 2337 case -EINTR: 2338 case -EBUSY: 2339 /* 2340 * EBUSY is ok: this just means that another thread 2341 * already did the job. 2342 */ 2343 ret = VM_PAGER_OK; 2344 break; 2345 case -ENOMEM: 2346 ret = VM_PAGER_ERROR; 2347 break; 2348 case -ENOSPC: 2349 case -EFAULT: 2350 ret = VM_PAGER_ERROR; 2351 break; 2352 default: 2353 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 2354 ret = VM_PAGER_ERROR; 2355 break; 2356 } 2357 2358 out: 2359 intel_runtime_pm_put(dev_priv); 2360 return ret; 2361 } 2362 2363 #endif 2364 2365 /** 2366 * i915_gem_release_mmap - remove physical page mappings 2367 * @obj: obj in question 2368 * 2369 * Preserve the reservation of the mmapping with the DRM core code, but 2370 * relinquish ownership of the pages back to the system. 2371 * 2372 * It is vital that we remove the page mapping if we have mapped a tiled 2373 * object through the GTT and then lose the fence register due to 2374 * resource pressure. Similarly if the object has been moved out of the 2375 * aperture, than pages mapped into userspace must be revoked. Removing the 2376 * mapping will then trigger a page fault on the next user access, allowing 2377 * fixup by i915_gem_fault(). 2378 */ 2379 void 2380 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 2381 { 2382 if (!obj->fault_mappable) 2383 return; 2384 2385 #ifdef __linux__ 2386 drm_vma_node_unmap(&obj->base.vma_node, 2387 obj->base.dev->anon_inode->i_mapping); 2388 #else 2389 if (drm_vma_node_has_offset(&obj->base.vma_node)) { 2390 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2391 struct vm_page *pg; 2392 2393 for (pg = &dev_priv->pgs[atop(i915_gem_obj_ggtt_offset(obj))]; 2394 pg != &dev_priv->pgs[atop(i915_gem_obj_ggtt_offset(obj) + obj->base.size)]; 2395 pg++) 2396 pmap_page_protect(pg, PROT_NONE); 2397 } 2398 #endif 2399 obj->fault_mappable = false; 2400 } 2401 2402 void 2403 i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv) 2404 { 2405 struct drm_i915_gem_object *obj; 2406 2407 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) 2408 i915_gem_release_mmap(obj); 2409 } 2410 2411 uint32_t 2412 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) 2413 { 2414 uint32_t gtt_size; 2415 2416 if (INTEL_INFO(dev)->gen >= 4 || 2417 tiling_mode == I915_TILING_NONE) 2418 return size; 2419 2420 /* Previous chips need a power-of-two fence region when tiling */ 2421 if (INTEL_INFO(dev)->gen == 3) 2422 gtt_size = 1024*1024; 2423 else 2424 gtt_size = 512*1024; 2425 2426 while (gtt_size < size) 2427 gtt_size <<= 1; 2428 2429 return gtt_size; 2430 } 2431 2432 /** 2433 * i915_gem_get_gtt_alignment - return required GTT alignment for an object 2434 * @obj: object to check 2435 * 2436 * Return the required GTT alignment for an object, taking into account 2437 * potential fence register mapping. 2438 */ 2439 uint32_t 2440 i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size, 2441 int tiling_mode, bool fenced) 2442 { 2443 /* 2444 * Minimum alignment is 4k (GTT page size), but might be greater 2445 * if a fence register is needed for the object. 2446 */ 2447 if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) || 2448 tiling_mode == I915_TILING_NONE) 2449 return 4096; 2450 2451 /* 2452 * Previous chips need to be aligned to the size of the smallest 2453 * fence register that can contain the object. 2454 */ 2455 return i915_gem_get_gtt_size(dev, size, tiling_mode); 2456 } 2457 2458 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) 2459 { 2460 #ifdef notyet 2461 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2462 #endif 2463 int ret; 2464 2465 if (drm_vma_node_has_offset(&obj->base.vma_node)) 2466 return 0; 2467 2468 #ifdef notyet 2469 dev_priv->mm.shrinker_no_lock_stealing = true; 2470 #endif 2471 2472 ret = drm_gem_create_mmap_offset(&obj->base); 2473 #ifdef notyet 2474 if (ret != -ENOSPC) 2475 goto out; 2476 2477 /* Badly fragmented mmap space? The only way we can recover 2478 * space is by destroying unwanted objects. We can't randomly release 2479 * mmap_offsets as userspace expects them to be persistent for the 2480 * lifetime of the objects. The closest we can is to release the 2481 * offsets on purgeable objects by truncating it and marking it purged, 2482 * which prevents userspace from ever using that object again. 2483 */ 2484 i915_gem_shrink(dev_priv, 2485 obj->base.size >> PAGE_SHIFT, 2486 I915_SHRINK_BOUND | 2487 I915_SHRINK_UNBOUND | 2488 I915_SHRINK_PURGEABLE); 2489 ret = drm_gem_create_mmap_offset(&obj->base); 2490 if (ret != -ENOSPC) 2491 goto out; 2492 2493 i915_gem_shrink_all(dev_priv); 2494 ret = drm_gem_create_mmap_offset(&obj->base); 2495 out: 2496 dev_priv->mm.shrinker_no_lock_stealing = false; 2497 #endif 2498 2499 return ret; 2500 } 2501 2502 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) 2503 { 2504 drm_gem_free_mmap_offset(&obj->base); 2505 } 2506 2507 int 2508 i915_gem_mmap_gtt(struct drm_file *file, 2509 struct drm_device *dev, 2510 uint32_t handle, 2511 uint64_t *offset) 2512 { 2513 struct drm_i915_gem_object *obj; 2514 int ret; 2515 2516 ret = i915_mutex_lock_interruptible(dev); 2517 if (ret) 2518 return ret; 2519 2520 obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle)); 2521 if (&obj->base == NULL) { 2522 ret = -ENOENT; 2523 goto unlock; 2524 } 2525 2526 if (obj->madv != I915_MADV_WILLNEED) { 2527 DRM_DEBUG("Attempting to mmap a purgeable buffer\n"); 2528 ret = -EFAULT; 2529 goto out; 2530 } 2531 2532 ret = i915_gem_object_create_mmap_offset(obj); 2533 if (ret) 2534 goto out; 2535 2536 *offset = drm_vma_node_offset_addr(&obj->base.vma_node); 2537 2538 out: 2539 drm_gem_object_unreference(&obj->base); 2540 unlock: 2541 mutex_unlock(&dev->struct_mutex); 2542 return ret; 2543 } 2544 2545 /** 2546 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 2547 * @dev: DRM device 2548 * @data: GTT mapping ioctl data 2549 * @file: GEM object info 2550 * 2551 * Simply returns the fake offset to userspace so it can mmap it. 2552 * The mmap call will end up in drm_gem_mmap(), which will set things 2553 * up so we can get faults in the handler above. 2554 * 2555 * The fault handler will take care of binding the object into the GTT 2556 * (since it may have been evicted to make room for something), allocating 2557 * a fence register, and mapping the appropriate aperture address into 2558 * userspace. 2559 */ 2560 int 2561 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 2562 struct drm_file *file) 2563 { 2564 struct drm_i915_gem_mmap_gtt *args = data; 2565 2566 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); 2567 } 2568 2569 /* Immediately discard the backing storage */ 2570 static void 2571 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 2572 { 2573 i915_gem_object_free_mmap_offset(obj); 2574 2575 if (obj->base.filp == NULL) 2576 return; 2577 2578 /* Our goal here is to return as much of the memory as 2579 * is possible back to the system as we are called from OOM. 2580 * To do this we must instruct the shmfs to drop all of its 2581 * backing pages, *now*. 2582 */ 2583 #ifdef __linux__ 2584 shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1); 2585 #else 2586 obj->base.uao->pgops->pgo_flush(obj->base.uao, 0, obj->base.size, 2587 PGO_ALLPAGES | PGO_FREE); 2588 #endif 2589 obj->madv = __I915_MADV_PURGED; 2590 } 2591 2592 /* Try to discard unwanted pages */ 2593 static void 2594 i915_gem_object_invalidate(struct drm_i915_gem_object *obj) 2595 { 2596 #ifdef __linux__ 2597 struct address_space *mapping; 2598 #endif 2599 2600 switch (obj->madv) { 2601 case I915_MADV_DONTNEED: 2602 i915_gem_object_truncate(obj); 2603 case __I915_MADV_PURGED: 2604 return; 2605 } 2606 2607 if (obj->base.filp == NULL) 2608 return; 2609 2610 #ifdef __linux__ 2611 mapping = file_inode(obj->base.filp)->i_mapping, 2612 invalidate_mapping_pages(mapping, 0, (loff_t)-1); 2613 #endif 2614 } 2615 2616 static void 2617 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) 2618 { 2619 struct sg_page_iter sg_iter; 2620 int ret; 2621 2622 BUG_ON(obj->madv == __I915_MADV_PURGED); 2623 2624 ret = i915_gem_object_set_to_cpu_domain(obj, true); 2625 if (ret) { 2626 /* In the event of a disaster, abandon all caches and 2627 * hope for the best. 2628 */ 2629 WARN_ON(ret != -EIO); 2630 i915_gem_clflush_object(obj, true); 2631 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 2632 } 2633 2634 i915_gem_gtt_finish_object(obj); 2635 2636 if (i915_gem_object_needs_bit17_swizzle(obj)) 2637 i915_gem_object_save_bit_17_swizzle(obj); 2638 2639 if (obj->madv == I915_MADV_DONTNEED) 2640 obj->dirty = 0; 2641 2642 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) { 2643 struct vm_page *page = sg_page_iter_page(&sg_iter); 2644 2645 if (obj->dirty) 2646 set_page_dirty(page); 2647 2648 #ifdef __linux__ 2649 if (obj->madv == I915_MADV_WILLNEED) 2650 mark_page_accessed(page); 2651 2652 page_cache_release(page); 2653 #endif 2654 } 2655 #ifdef __OpenBSD__ 2656 uvm_objunwire(obj->base.uao, 0, obj->base.size); 2657 #endif 2658 obj->dirty = 0; 2659 2660 sg_free_table(obj->pages); 2661 kfree(obj->pages); 2662 } 2663 2664 int 2665 i915_gem_object_put_pages(struct drm_i915_gem_object *obj) 2666 { 2667 const struct drm_i915_gem_object_ops *ops = obj->ops; 2668 2669 if (obj->pages == NULL) 2670 return 0; 2671 2672 if (obj->pages_pin_count) 2673 return -EBUSY; 2674 2675 BUG_ON(i915_gem_obj_bound_any(obj)); 2676 2677 /* ->put_pages might need to allocate memory for the bit17 swizzle 2678 * array, hence protect them from being reaped by removing them from gtt 2679 * lists early. */ 2680 list_del(&obj->global_list); 2681 2682 ops->put_pages(obj); 2683 obj->pages = NULL; 2684 2685 i915_gem_object_invalidate(obj); 2686 2687 return 0; 2688 } 2689 2690 static int 2691 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 2692 { 2693 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2694 int page_count, i; 2695 #ifdef __linux__ 2696 struct address_space *mapping; 2697 #endif 2698 struct sg_table *st; 2699 struct scatterlist *sg; 2700 #ifdef __linux__ 2701 struct sg_page_iter sg_iter; 2702 #endif 2703 struct pglist plist; 2704 struct vm_page *page; 2705 #ifdef __linux__ 2706 unsigned long last_pfn = 0; /* suppress gcc warning */ 2707 #endif 2708 int ret; 2709 #ifdef __linux__ 2710 gfp_t gfp; 2711 #endif 2712 2713 /* Assert that the object is not currently in any GPU domain. As it 2714 * wasn't in the GTT, there shouldn't be any way it could have been in 2715 * a GPU cache 2716 */ 2717 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); 2718 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); 2719 2720 st = kmalloc(sizeof(*st), GFP_KERNEL); 2721 if (st == NULL) 2722 return -ENOMEM; 2723 2724 page_count = obj->base.size / PAGE_SIZE; 2725 if (sg_alloc_table(st, page_count, GFP_KERNEL)) { 2726 kfree(st); 2727 return -ENOMEM; 2728 } 2729 2730 #ifdef __linux__ 2731 /* Get the list of pages out of our struct file. They'll be pinned 2732 * at this point until we release them. 2733 * 2734 * Fail silently without starting the shrinker 2735 */ 2736 mapping = file_inode(obj->base.filp)->i_mapping; 2737 gfp = mapping_gfp_constraint(mapping, ~(__GFP_IO | __GFP_RECLAIM)); 2738 gfp |= __GFP_NORETRY | __GFP_NOWARN; 2739 sg = st->sgl; 2740 st->nents = 0; 2741 for (i = 0; i < page_count; i++) { 2742 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 2743 if (IS_ERR(page)) { 2744 i915_gem_shrink(dev_priv, 2745 page_count, 2746 I915_SHRINK_BOUND | 2747 I915_SHRINK_UNBOUND | 2748 I915_SHRINK_PURGEABLE); 2749 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 2750 } 2751 if (IS_ERR(page)) { 2752 /* We've tried hard to allocate the memory by reaping 2753 * our own buffer, now let the real VM do its job and 2754 * go down in flames if truly OOM. 2755 */ 2756 i915_gem_shrink_all(dev_priv); 2757 page = shmem_read_mapping_page(mapping, i); 2758 if (IS_ERR(page)) { 2759 ret = PTR_ERR(page); 2760 goto err_pages; 2761 } 2762 } 2763 #ifdef CONFIG_SWIOTLB 2764 if (swiotlb_nr_tbl()) { 2765 st->nents++; 2766 sg_set_page(sg, page, PAGE_SIZE, 0); 2767 sg = sg_next(sg); 2768 continue; 2769 } 2770 #endif 2771 if (!i || page_to_pfn(page) != last_pfn + 1) { 2772 if (i) 2773 sg = sg_next(sg); 2774 st->nents++; 2775 sg_set_page(sg, page, PAGE_SIZE, 0); 2776 } else { 2777 sg->length += PAGE_SIZE; 2778 } 2779 last_pfn = page_to_pfn(page); 2780 2781 /* Check that the i965g/gm workaround works. */ 2782 WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL)); 2783 } 2784 #ifdef CONFIG_SWIOTLB 2785 if (!swiotlb_nr_tbl()) 2786 #endif 2787 sg_mark_end(sg); 2788 #else 2789 sg = st->sgl; 2790 st->nents = 0; 2791 2792 TAILQ_INIT(&plist); 2793 if (uvm_objwire(obj->base.uao, 0, obj->base.size, &plist)) { 2794 ret = -ENOMEM; 2795 goto err_pages; 2796 } 2797 2798 i = 0; 2799 TAILQ_FOREACH(page, &plist, pageq) { 2800 st->nents++; 2801 sg_dma_address(sg) = VM_PAGE_TO_PHYS(page); 2802 sg_dma_len(sg) = sg->length = PAGE_SIZE; 2803 sg++; 2804 i++; 2805 } 2806 #endif 2807 obj->pages = st; 2808 2809 ret = i915_gem_gtt_prepare_object(obj); 2810 if (ret) 2811 goto err_pages; 2812 2813 if (i915_gem_object_needs_bit17_swizzle(obj)) 2814 i915_gem_object_do_bit_17_swizzle(obj); 2815 2816 if (obj->tiling_mode != I915_TILING_NONE && 2817 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) 2818 i915_gem_object_pin_pages(obj); 2819 2820 return 0; 2821 2822 err_pages: 2823 sg_mark_end(sg); 2824 #ifdef __linux__ 2825 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) 2826 page_cache_release(sg_page_iter_page(&sg_iter)); 2827 #else 2828 uvm_objunwire(obj->base.uao, 0, obj->base.size); 2829 #endif 2830 sg_free_table(st); 2831 kfree(st); 2832 2833 /* shmemfs first checks if there is enough memory to allocate the page 2834 * and reports ENOSPC should there be insufficient, along with the usual 2835 * ENOMEM for a genuine allocation failure. 2836 * 2837 * We use ENOSPC in our driver to mean that we have run out of aperture 2838 * space and so want to translate the error from shmemfs back to our 2839 * usual understanding of ENOMEM. 2840 */ 2841 if (ret == -ENOSPC) 2842 ret = -ENOMEM; 2843 2844 return ret; 2845 } 2846 2847 /* Ensure that the associated pages are gathered from the backing storage 2848 * and pinned into our object. i915_gem_object_get_pages() may be called 2849 * multiple times before they are released by a single call to 2850 * i915_gem_object_put_pages() - once the pages are no longer referenced 2851 * either as a result of memory pressure (reaping pages under the shrinker) 2852 * or as the object is itself released. 2853 */ 2854 int 2855 i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2856 { 2857 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2858 const struct drm_i915_gem_object_ops *ops = obj->ops; 2859 int ret; 2860 2861 if (obj->pages) 2862 return 0; 2863 2864 if (obj->madv != I915_MADV_WILLNEED) { 2865 DRM_DEBUG("Attempting to obtain a purgeable object\n"); 2866 return -EFAULT; 2867 } 2868 2869 BUG_ON(obj->pages_pin_count); 2870 2871 ret = ops->get_pages(obj); 2872 if (ret) 2873 return ret; 2874 2875 list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list); 2876 2877 obj->get_page.sg = obj->pages->sgl; 2878 obj->get_page.last = 0; 2879 2880 return 0; 2881 } 2882 2883 void i915_vma_move_to_active(struct i915_vma *vma, 2884 struct drm_i915_gem_request *req) 2885 { 2886 struct drm_i915_gem_object *obj = vma->obj; 2887 struct intel_engine_cs *ring; 2888 2889 ring = i915_gem_request_get_ring(req); 2890 2891 /* Add a reference if we're newly entering the active list. */ 2892 if (obj->active == 0) 2893 drm_gem_object_reference(&obj->base); 2894 obj->active |= intel_ring_flag(ring); 2895 2896 list_move_tail(&obj->ring_list[ring->id], &ring->active_list); 2897 i915_gem_request_assign(&obj->last_read_req[ring->id], req); 2898 2899 list_move_tail(&vma->mm_list, &vma->vm->active_list); 2900 } 2901 2902 static void 2903 i915_gem_object_retire__write(struct drm_i915_gem_object *obj) 2904 { 2905 RQ_BUG_ON(obj->last_write_req == NULL); 2906 RQ_BUG_ON(!(obj->active & intel_ring_flag(obj->last_write_req->ring))); 2907 2908 i915_gem_request_assign(&obj->last_write_req, NULL); 2909 intel_fb_obj_flush(obj, true, ORIGIN_CS); 2910 } 2911 2912 static void 2913 i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring) 2914 { 2915 struct i915_vma *vma; 2916 2917 RQ_BUG_ON(obj->last_read_req[ring] == NULL); 2918 RQ_BUG_ON(!(obj->active & (1 << ring))); 2919 2920 list_del_init(&obj->ring_list[ring]); 2921 i915_gem_request_assign(&obj->last_read_req[ring], NULL); 2922 2923 if (obj->last_write_req && obj->last_write_req->ring->id == ring) 2924 i915_gem_object_retire__write(obj); 2925 2926 obj->active &= ~(1 << ring); 2927 if (obj->active) 2928 return; 2929 2930 /* Bump our place on the bound list to keep it roughly in LRU order 2931 * so that we don't steal from recently used but inactive objects 2932 * (unless we are forced to ofc!) 2933 */ 2934 list_move_tail(&obj->global_list, 2935 &to_i915(obj->base.dev)->mm.bound_list); 2936 2937 list_for_each_entry(vma, &obj->vma_list, vma_link) { 2938 if (!list_empty(&vma->mm_list)) 2939 list_move_tail(&vma->mm_list, &vma->vm->inactive_list); 2940 } 2941 2942 i915_gem_request_assign(&obj->last_fenced_req, NULL); 2943 drm_gem_object_unreference(&obj->base); 2944 } 2945 2946 static int 2947 i915_gem_init_seqno(struct drm_device *dev, u32 seqno) 2948 { 2949 struct drm_i915_private *dev_priv = dev->dev_private; 2950 struct intel_engine_cs *ring; 2951 int ret, i, j; 2952 2953 /* Carefully retire all requests without writing to the rings */ 2954 for_each_ring(ring, dev_priv, i) { 2955 ret = intel_ring_idle(ring); 2956 if (ret) 2957 return ret; 2958 } 2959 i915_gem_retire_requests(dev); 2960 2961 /* Finally reset hw state */ 2962 for_each_ring(ring, dev_priv, i) { 2963 intel_ring_init_seqno(ring, seqno); 2964 2965 for (j = 0; j < ARRAY_SIZE(ring->semaphore.sync_seqno); j++) 2966 ring->semaphore.sync_seqno[j] = 0; 2967 } 2968 2969 return 0; 2970 } 2971 2972 int i915_gem_set_seqno(struct drm_device *dev, u32 seqno) 2973 { 2974 struct drm_i915_private *dev_priv = dev->dev_private; 2975 int ret; 2976 2977 if (seqno == 0) 2978 return -EINVAL; 2979 2980 /* HWS page needs to be set less than what we 2981 * will inject to ring 2982 */ 2983 ret = i915_gem_init_seqno(dev, seqno - 1); 2984 if (ret) 2985 return ret; 2986 2987 /* Carefully set the last_seqno value so that wrap 2988 * detection still works 2989 */ 2990 dev_priv->next_seqno = seqno; 2991 dev_priv->last_seqno = seqno - 1; 2992 if (dev_priv->last_seqno == 0) 2993 dev_priv->last_seqno--; 2994 2995 return 0; 2996 } 2997 2998 int 2999 i915_gem_get_seqno(struct drm_device *dev, u32 *seqno) 3000 { 3001 struct drm_i915_private *dev_priv = dev->dev_private; 3002 3003 /* reserve 0 for non-seqno */ 3004 if (dev_priv->next_seqno == 0) { 3005 int ret = i915_gem_init_seqno(dev, 0); 3006 if (ret) 3007 return ret; 3008 3009 dev_priv->next_seqno = 1; 3010 } 3011 3012 *seqno = dev_priv->last_seqno = dev_priv->next_seqno++; 3013 return 0; 3014 } 3015 3016 /* 3017 * NB: This function is not allowed to fail. Doing so would mean the the 3018 * request is not being tracked for completion but the work itself is 3019 * going to happen on the hardware. This would be a Bad Thing(tm). 3020 */ 3021 void __i915_add_request(struct drm_i915_gem_request *request, 3022 struct drm_i915_gem_object *obj, 3023 bool flush_caches) 3024 { 3025 struct intel_engine_cs *ring; 3026 struct drm_i915_private *dev_priv; 3027 struct intel_ringbuffer *ringbuf; 3028 u32 request_start; 3029 int ret; 3030 3031 if (WARN_ON(request == NULL)) 3032 return; 3033 3034 ring = request->ring; 3035 dev_priv = ring->dev->dev_private; 3036 ringbuf = request->ringbuf; 3037 3038 /* 3039 * To ensure that this call will not fail, space for its emissions 3040 * should already have been reserved in the ring buffer. Let the ring 3041 * know that it is time to use that space up. 3042 */ 3043 intel_ring_reserved_space_use(ringbuf); 3044 3045 request_start = intel_ring_get_tail(ringbuf); 3046 /* 3047 * Emit any outstanding flushes - execbuf can fail to emit the flush 3048 * after having emitted the batchbuffer command. Hence we need to fix 3049 * things up similar to emitting the lazy request. The difference here 3050 * is that the flush _must_ happen before the next request, no matter 3051 * what. 3052 */ 3053 if (flush_caches) { 3054 if (i915.enable_execlists) 3055 ret = logical_ring_flush_all_caches(request); 3056 else 3057 ret = intel_ring_flush_all_caches(request); 3058 /* Not allowed to fail! */ 3059 WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret); 3060 } 3061 3062 /* Record the position of the start of the request so that 3063 * should we detect the updated seqno part-way through the 3064 * GPU processing the request, we never over-estimate the 3065 * position of the head. 3066 */ 3067 request->postfix = intel_ring_get_tail(ringbuf); 3068 3069 if (i915.enable_execlists) 3070 ret = ring->emit_request(request); 3071 else { 3072 ret = ring->add_request(request); 3073 3074 request->tail = intel_ring_get_tail(ringbuf); 3075 } 3076 /* Not allowed to fail! */ 3077 WARN(ret, "emit|add_request failed: %d!\n", ret); 3078 3079 request->head = request_start; 3080 3081 /* Whilst this request exists, batch_obj will be on the 3082 * active_list, and so will hold the active reference. Only when this 3083 * request is retired will the the batch_obj be moved onto the 3084 * inactive_list and lose its active reference. Hence we do not need 3085 * to explicitly hold another reference here. 3086 */ 3087 request->batch_obj = obj; 3088 3089 request->emitted_jiffies = jiffies; 3090 request->previous_seqno = ring->last_submitted_seqno; 3091 ring->last_submitted_seqno = request->seqno; 3092 list_add_tail(&request->list, &ring->request_list); 3093 3094 trace_i915_gem_request_add(request); 3095 3096 i915_queue_hangcheck(ring->dev); 3097 3098 queue_delayed_work(dev_priv->wq, 3099 &dev_priv->mm.retire_work, 3100 round_jiffies_up_relative(HZ)); 3101 intel_mark_busy(dev_priv->dev); 3102 3103 /* Sanity check that the reserved size was large enough. */ 3104 intel_ring_reserved_space_end(ringbuf); 3105 } 3106 3107 static bool i915_context_is_banned(struct drm_i915_private *dev_priv, 3108 const struct intel_context *ctx) 3109 { 3110 unsigned long elapsed; 3111 3112 elapsed = get_seconds() - ctx->hang_stats.guilty_ts; 3113 3114 if (ctx->hang_stats.banned) 3115 return true; 3116 3117 if (ctx->hang_stats.ban_period_seconds && 3118 elapsed <= ctx->hang_stats.ban_period_seconds) { 3119 if (!i915_gem_context_is_default(ctx)) { 3120 DRM_DEBUG("context hanging too fast, banning!\n"); 3121 return true; 3122 } else if (i915_stop_ring_allow_ban(dev_priv)) { 3123 if (i915_stop_ring_allow_warn(dev_priv)) 3124 DRM_ERROR("gpu hanging too fast, banning!\n"); 3125 return true; 3126 } 3127 } 3128 3129 return false; 3130 } 3131 3132 static void i915_set_reset_status(struct drm_i915_private *dev_priv, 3133 struct intel_context *ctx, 3134 const bool guilty) 3135 { 3136 struct i915_ctx_hang_stats *hs; 3137 3138 if (WARN_ON(!ctx)) 3139 return; 3140 3141 hs = &ctx->hang_stats; 3142 3143 if (guilty) { 3144 hs->banned = i915_context_is_banned(dev_priv, ctx); 3145 hs->batch_active++; 3146 hs->guilty_ts = get_seconds(); 3147 } else { 3148 hs->batch_pending++; 3149 } 3150 } 3151 3152 void i915_gem_request_free(struct kref *req_ref) 3153 { 3154 struct drm_i915_gem_request *req = container_of(req_ref, 3155 typeof(*req), ref); 3156 struct intel_context *ctx = req->ctx; 3157 3158 if (req->file_priv) 3159 i915_gem_request_remove_from_client(req); 3160 3161 if (ctx) { 3162 if (i915.enable_execlists) { 3163 if (ctx != req->ring->default_context) 3164 intel_lr_context_unpin(req); 3165 } 3166 3167 i915_gem_context_unreference(ctx); 3168 } 3169 3170 #ifdef __linux__ 3171 kmem_cache_free(req->i915->requests, req); 3172 #else 3173 pool_put(&req->i915->requests, req); 3174 #endif 3175 } 3176 3177 int i915_gem_request_alloc(struct intel_engine_cs *ring, 3178 struct intel_context *ctx, 3179 struct drm_i915_gem_request **req_out) 3180 { 3181 struct drm_i915_private *dev_priv = to_i915(ring->dev); 3182 struct drm_i915_gem_request *req; 3183 int ret; 3184 3185 if (!req_out) 3186 return -EINVAL; 3187 3188 *req_out = NULL; 3189 3190 #ifdef __linux__ 3191 req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL); 3192 #else 3193 req = pool_get(&dev_priv->requests, PR_WAITOK | PR_ZERO); 3194 #endif 3195 if (req == NULL) 3196 return -ENOMEM; 3197 3198 ret = i915_gem_get_seqno(ring->dev, &req->seqno); 3199 if (ret) 3200 goto err; 3201 3202 kref_init(&req->ref); 3203 req->i915 = dev_priv; 3204 req->ring = ring; 3205 req->ctx = ctx; 3206 i915_gem_context_reference(req->ctx); 3207 3208 if (i915.enable_execlists) 3209 ret = intel_logical_ring_alloc_request_extras(req); 3210 else 3211 ret = intel_ring_alloc_request_extras(req); 3212 if (ret) { 3213 i915_gem_context_unreference(req->ctx); 3214 goto err; 3215 } 3216 3217 /* 3218 * Reserve space in the ring buffer for all the commands required to 3219 * eventually emit this request. This is to guarantee that the 3220 * i915_add_request() call can't fail. Note that the reserve may need 3221 * to be redone if the request is not actually submitted straight 3222 * away, e.g. because a GPU scheduler has deferred it. 3223 */ 3224 if (i915.enable_execlists) 3225 ret = intel_logical_ring_reserve_space(req); 3226 else 3227 ret = intel_ring_reserve_space(req); 3228 if (ret) { 3229 /* 3230 * At this point, the request is fully allocated even if not 3231 * fully prepared. Thus it can be cleaned up using the proper 3232 * free code. 3233 */ 3234 i915_gem_request_cancel(req); 3235 return ret; 3236 } 3237 3238 *req_out = req; 3239 return 0; 3240 3241 err: 3242 #ifdef __linux__ 3243 kmem_cache_free(dev_priv->requests, req); 3244 #else 3245 pool_put(&dev_priv->requests, req); 3246 #endif 3247 return ret; 3248 } 3249 3250 void i915_gem_request_cancel(struct drm_i915_gem_request *req) 3251 { 3252 intel_ring_reserved_space_cancel(req->ringbuf); 3253 3254 i915_gem_request_unreference(req); 3255 } 3256 3257 struct drm_i915_gem_request * 3258 i915_gem_find_active_request(struct intel_engine_cs *ring) 3259 { 3260 struct drm_i915_gem_request *request; 3261 3262 list_for_each_entry(request, &ring->request_list, list) { 3263 if (i915_gem_request_completed(request, false)) 3264 continue; 3265 3266 return request; 3267 } 3268 3269 return NULL; 3270 } 3271 3272 static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv, 3273 struct intel_engine_cs *ring) 3274 { 3275 struct drm_i915_gem_request *request; 3276 bool ring_hung; 3277 3278 request = i915_gem_find_active_request(ring); 3279 3280 if (request == NULL) 3281 return; 3282 3283 ring_hung = ring->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG; 3284 3285 i915_set_reset_status(dev_priv, request->ctx, ring_hung); 3286 3287 list_for_each_entry_continue(request, &ring->request_list, list) 3288 i915_set_reset_status(dev_priv, request->ctx, false); 3289 } 3290 3291 static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv, 3292 struct intel_engine_cs *ring) 3293 { 3294 while (!list_empty(&ring->active_list)) { 3295 struct drm_i915_gem_object *obj; 3296 3297 obj = list_first_entry(&ring->active_list, 3298 struct drm_i915_gem_object, 3299 ring_list[ring->id]); 3300 3301 i915_gem_object_retire__read(obj, ring->id); 3302 } 3303 3304 /* 3305 * Clear the execlists queue up before freeing the requests, as those 3306 * are the ones that keep the context and ringbuffer backing objects 3307 * pinned in place. 3308 */ 3309 while (!list_empty(&ring->execlist_queue)) { 3310 struct drm_i915_gem_request *submit_req; 3311 3312 submit_req = list_first_entry(&ring->execlist_queue, 3313 struct drm_i915_gem_request, 3314 execlist_link); 3315 list_del(&submit_req->execlist_link); 3316 3317 if (submit_req->ctx != ring->default_context) 3318 intel_lr_context_unpin(submit_req); 3319 3320 i915_gem_request_unreference(submit_req); 3321 } 3322 3323 /* 3324 * We must free the requests after all the corresponding objects have 3325 * been moved off active lists. Which is the same order as the normal 3326 * retire_requests function does. This is important if object hold 3327 * implicit references on things like e.g. ppgtt address spaces through 3328 * the request. 3329 */ 3330 while (!list_empty(&ring->request_list)) { 3331 struct drm_i915_gem_request *request; 3332 3333 request = list_first_entry(&ring->request_list, 3334 struct drm_i915_gem_request, 3335 list); 3336 3337 i915_gem_request_retire(request); 3338 } 3339 } 3340 3341 void i915_gem_reset(struct drm_device *dev) 3342 { 3343 struct drm_i915_private *dev_priv = dev->dev_private; 3344 struct intel_engine_cs *ring; 3345 int i; 3346 3347 /* 3348 * Before we free the objects from the requests, we need to inspect 3349 * them for finding the guilty party. As the requests only borrow 3350 * their reference to the objects, the inspection must be done first. 3351 */ 3352 for_each_ring(ring, dev_priv, i) 3353 i915_gem_reset_ring_status(dev_priv, ring); 3354 3355 for_each_ring(ring, dev_priv, i) 3356 i915_gem_reset_ring_cleanup(dev_priv, ring); 3357 3358 i915_gem_context_reset(dev); 3359 3360 i915_gem_restore_fences(dev); 3361 3362 WARN_ON(i915_verify_lists(dev)); 3363 } 3364 3365 /** 3366 * This function clears the request list as sequence numbers are passed. 3367 */ 3368 void 3369 i915_gem_retire_requests_ring(struct intel_engine_cs *ring) 3370 { 3371 WARN_ON(i915_verify_lists(ring->dev)); 3372 3373 /* Retire requests first as we use it above for the early return. 3374 * If we retire requests last, we may use a later seqno and so clear 3375 * the requests lists without clearing the active list, leading to 3376 * confusion. 3377 */ 3378 while (!list_empty(&ring->request_list)) { 3379 struct drm_i915_gem_request *request; 3380 3381 request = list_first_entry(&ring->request_list, 3382 struct drm_i915_gem_request, 3383 list); 3384 3385 if (!i915_gem_request_completed(request, true)) 3386 break; 3387 3388 i915_gem_request_retire(request); 3389 } 3390 3391 /* Move any buffers on the active list that are no longer referenced 3392 * by the ringbuffer to the flushing/inactive lists as appropriate, 3393 * before we free the context associated with the requests. 3394 */ 3395 while (!list_empty(&ring->active_list)) { 3396 struct drm_i915_gem_object *obj; 3397 3398 obj = list_first_entry(&ring->active_list, 3399 struct drm_i915_gem_object, 3400 ring_list[ring->id]); 3401 3402 if (!list_empty(&obj->last_read_req[ring->id]->list)) 3403 break; 3404 3405 i915_gem_object_retire__read(obj, ring->id); 3406 } 3407 3408 if (unlikely(ring->trace_irq_req && 3409 i915_gem_request_completed(ring->trace_irq_req, true))) { 3410 ring->irq_put(ring); 3411 i915_gem_request_assign(&ring->trace_irq_req, NULL); 3412 } 3413 3414 WARN_ON(i915_verify_lists(ring->dev)); 3415 } 3416 3417 bool 3418 i915_gem_retire_requests(struct drm_device *dev) 3419 { 3420 struct drm_i915_private *dev_priv = dev->dev_private; 3421 struct intel_engine_cs *ring; 3422 bool idle = true; 3423 int i; 3424 3425 for_each_ring(ring, dev_priv, i) { 3426 i915_gem_retire_requests_ring(ring); 3427 idle &= list_empty(&ring->request_list); 3428 if (i915.enable_execlists) { 3429 unsigned long flags; 3430 3431 spin_lock_irqsave(&ring->execlist_lock, flags); 3432 idle &= list_empty(&ring->execlist_queue); 3433 spin_unlock_irqrestore(&ring->execlist_lock, flags); 3434 3435 intel_execlists_retire_requests(ring); 3436 } 3437 } 3438 3439 if (idle) 3440 mod_delayed_work(dev_priv->wq, 3441 &dev_priv->mm.idle_work, 3442 msecs_to_jiffies(100)); 3443 3444 return idle; 3445 } 3446 3447 static void 3448 i915_gem_retire_work_handler(struct work_struct *work) 3449 { 3450 struct drm_i915_private *dev_priv = 3451 container_of(work, typeof(*dev_priv), mm.retire_work.work); 3452 struct drm_device *dev = dev_priv->dev; 3453 bool idle; 3454 3455 /* Come back later if the device is busy... */ 3456 idle = false; 3457 if (mutex_trylock(&dev->struct_mutex)) { 3458 idle = i915_gem_retire_requests(dev); 3459 mutex_unlock(&dev->struct_mutex); 3460 } 3461 if (!idle) 3462 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 3463 round_jiffies_up_relative(HZ)); 3464 } 3465 3466 static void 3467 i915_gem_idle_work_handler(struct work_struct *work) 3468 { 3469 struct drm_i915_private *dev_priv = 3470 container_of(work, typeof(*dev_priv), mm.idle_work.work); 3471 struct drm_device *dev = dev_priv->dev; 3472 struct intel_engine_cs *ring; 3473 int i; 3474 3475 for_each_ring(ring, dev_priv, i) 3476 if (!list_empty(&ring->request_list)) 3477 return; 3478 3479 intel_mark_idle(dev); 3480 3481 if (mutex_trylock(&dev->struct_mutex)) { 3482 struct intel_engine_cs *ring; 3483 int i; 3484 3485 for_each_ring(ring, dev_priv, i) 3486 i915_gem_batch_pool_fini(&ring->batch_pool); 3487 3488 mutex_unlock(&dev->struct_mutex); 3489 } 3490 } 3491 3492 /** 3493 * Ensures that an object will eventually get non-busy by flushing any required 3494 * write domains, emitting any outstanding lazy request and retiring and 3495 * completed requests. 3496 */ 3497 static int 3498 i915_gem_object_flush_active(struct drm_i915_gem_object *obj) 3499 { 3500 int i; 3501 3502 if (!obj->active) 3503 return 0; 3504 3505 for (i = 0; i < I915_NUM_RINGS; i++) { 3506 struct drm_i915_gem_request *req; 3507 3508 req = obj->last_read_req[i]; 3509 if (req == NULL) 3510 continue; 3511 3512 if (list_empty(&req->list)) 3513 goto retire; 3514 3515 if (i915_gem_request_completed(req, true)) { 3516 __i915_gem_request_retire__upto(req); 3517 retire: 3518 i915_gem_object_retire__read(obj, i); 3519 } 3520 } 3521 3522 return 0; 3523 } 3524 3525 /** 3526 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT 3527 * @DRM_IOCTL_ARGS: standard ioctl arguments 3528 * 3529 * Returns 0 if successful, else an error is returned with the remaining time in 3530 * the timeout parameter. 3531 * -ETIME: object is still busy after timeout 3532 * -ERESTARTSYS: signal interrupted the wait 3533 * -ENONENT: object doesn't exist 3534 * Also possible, but rare: 3535 * -EAGAIN: GPU wedged 3536 * -ENOMEM: damn 3537 * -ENODEV: Internal IRQ fail 3538 * -E?: The add request failed 3539 * 3540 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any 3541 * non-zero timeout parameter the wait ioctl will wait for the given number of 3542 * nanoseconds on an object becoming unbusy. Since the wait itself does so 3543 * without holding struct_mutex the object may become re-busied before this 3544 * function completes. A similar but shorter * race condition exists in the busy 3545 * ioctl 3546 */ 3547 int 3548 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 3549 { 3550 struct drm_i915_private *dev_priv = dev->dev_private; 3551 struct drm_i915_gem_wait *args = data; 3552 struct drm_i915_gem_object *obj; 3553 struct drm_i915_gem_request *req[I915_NUM_RINGS]; 3554 unsigned reset_counter; 3555 int i, n = 0; 3556 int ret; 3557 3558 if (args->flags != 0) 3559 return -EINVAL; 3560 3561 ret = i915_mutex_lock_interruptible(dev); 3562 if (ret) 3563 return ret; 3564 3565 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->bo_handle)); 3566 if (&obj->base == NULL) { 3567 mutex_unlock(&dev->struct_mutex); 3568 return -ENOENT; 3569 } 3570 3571 /* Need to make sure the object gets inactive eventually. */ 3572 ret = i915_gem_object_flush_active(obj); 3573 if (ret) 3574 goto out; 3575 3576 if (!obj->active) 3577 goto out; 3578 3579 /* Do this after OLR check to make sure we make forward progress polling 3580 * on this IOCTL with a timeout == 0 (like busy ioctl) 3581 */ 3582 if (args->timeout_ns == 0) { 3583 ret = -ETIME; 3584 goto out; 3585 } 3586 3587 drm_gem_object_unreference(&obj->base); 3588 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 3589 3590 for (i = 0; i < I915_NUM_RINGS; i++) { 3591 if (obj->last_read_req[i] == NULL) 3592 continue; 3593 3594 req[n++] = i915_gem_request_reference(obj->last_read_req[i]); 3595 } 3596 3597 mutex_unlock(&dev->struct_mutex); 3598 3599 for (i = 0; i < n; i++) { 3600 if (ret == 0) 3601 ret = __i915_wait_request(req[i], reset_counter, true, 3602 args->timeout_ns > 0 ? &args->timeout_ns : NULL, 3603 file->driver_priv); 3604 i915_gem_request_unreference__unlocked(req[i]); 3605 } 3606 return ret; 3607 3608 out: 3609 drm_gem_object_unreference(&obj->base); 3610 mutex_unlock(&dev->struct_mutex); 3611 return ret; 3612 } 3613 3614 static int 3615 __i915_gem_object_sync(struct drm_i915_gem_object *obj, 3616 struct intel_engine_cs *to, 3617 struct drm_i915_gem_request *from_req, 3618 struct drm_i915_gem_request **to_req) 3619 { 3620 struct intel_engine_cs *from; 3621 int ret; 3622 3623 from = i915_gem_request_get_ring(from_req); 3624 if (to == from) 3625 return 0; 3626 3627 if (i915_gem_request_completed(from_req, true)) 3628 return 0; 3629 3630 if (!i915_semaphore_is_enabled(obj->base.dev)) { 3631 struct drm_i915_private *i915 = to_i915(obj->base.dev); 3632 ret = __i915_wait_request(from_req, 3633 atomic_read(&i915->gpu_error.reset_counter), 3634 i915->mm.interruptible, 3635 NULL, 3636 &i915->rps.semaphores); 3637 if (ret) 3638 return ret; 3639 3640 i915_gem_object_retire_request(obj, from_req); 3641 } else { 3642 int idx = intel_ring_sync_index(from, to); 3643 u32 seqno = i915_gem_request_get_seqno(from_req); 3644 3645 WARN_ON(!to_req); 3646 3647 if (seqno <= from->semaphore.sync_seqno[idx]) 3648 return 0; 3649 3650 if (*to_req == NULL) { 3651 ret = i915_gem_request_alloc(to, to->default_context, to_req); 3652 if (ret) 3653 return ret; 3654 } 3655 3656 trace_i915_gem_ring_sync_to(*to_req, from, from_req); 3657 ret = to->semaphore.sync_to(*to_req, from, seqno); 3658 if (ret) 3659 return ret; 3660 3661 /* We use last_read_req because sync_to() 3662 * might have just caused seqno wrap under 3663 * the radar. 3664 */ 3665 from->semaphore.sync_seqno[idx] = 3666 i915_gem_request_get_seqno(obj->last_read_req[from->id]); 3667 } 3668 3669 return 0; 3670 } 3671 3672 /** 3673 * i915_gem_object_sync - sync an object to a ring. 3674 * 3675 * @obj: object which may be in use on another ring. 3676 * @to: ring we wish to use the object on. May be NULL. 3677 * @to_req: request we wish to use the object for. See below. 3678 * This will be allocated and returned if a request is 3679 * required but not passed in. 3680 * 3681 * This code is meant to abstract object synchronization with the GPU. 3682 * Calling with NULL implies synchronizing the object with the CPU 3683 * rather than a particular GPU ring. Conceptually we serialise writes 3684 * between engines inside the GPU. We only allow one engine to write 3685 * into a buffer at any time, but multiple readers. To ensure each has 3686 * a coherent view of memory, we must: 3687 * 3688 * - If there is an outstanding write request to the object, the new 3689 * request must wait for it to complete (either CPU or in hw, requests 3690 * on the same ring will be naturally ordered). 3691 * 3692 * - If we are a write request (pending_write_domain is set), the new 3693 * request must wait for outstanding read requests to complete. 3694 * 3695 * For CPU synchronisation (NULL to) no request is required. For syncing with 3696 * rings to_req must be non-NULL. However, a request does not have to be 3697 * pre-allocated. If *to_req is NULL and sync commands will be emitted then a 3698 * request will be allocated automatically and returned through *to_req. Note 3699 * that it is not guaranteed that commands will be emitted (because the system 3700 * might already be idle). Hence there is no need to create a request that 3701 * might never have any work submitted. Note further that if a request is 3702 * returned in *to_req, it is the responsibility of the caller to submit 3703 * that request (after potentially adding more work to it). 3704 * 3705 * Returns 0 if successful, else propagates up the lower layer error. 3706 */ 3707 int 3708 i915_gem_object_sync(struct drm_i915_gem_object *obj, 3709 struct intel_engine_cs *to, 3710 struct drm_i915_gem_request **to_req) 3711 { 3712 const bool readonly = obj->base.pending_write_domain == 0; 3713 struct drm_i915_gem_request *req[I915_NUM_RINGS]; 3714 int ret, i, n; 3715 3716 if (!obj->active) 3717 return 0; 3718 3719 if (to == NULL) 3720 return i915_gem_object_wait_rendering(obj, readonly); 3721 3722 n = 0; 3723 if (readonly) { 3724 if (obj->last_write_req) 3725 req[n++] = obj->last_write_req; 3726 } else { 3727 for (i = 0; i < I915_NUM_RINGS; i++) 3728 if (obj->last_read_req[i]) 3729 req[n++] = obj->last_read_req[i]; 3730 } 3731 for (i = 0; i < n; i++) { 3732 ret = __i915_gem_object_sync(obj, to, req[i], to_req); 3733 if (ret) 3734 return ret; 3735 } 3736 3737 return 0; 3738 } 3739 3740 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) 3741 { 3742 u32 old_write_domain, old_read_domains; 3743 3744 /* Force a pagefault for domain tracking on next user access */ 3745 i915_gem_release_mmap(obj); 3746 3747 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3748 return; 3749 3750 /* Wait for any direct GTT access to complete */ 3751 mb(); 3752 3753 old_read_domains = obj->base.read_domains; 3754 old_write_domain = obj->base.write_domain; 3755 3756 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT; 3757 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT; 3758 3759 trace_i915_gem_object_change_domain(obj, 3760 old_read_domains, 3761 old_write_domain); 3762 } 3763 3764 static int __i915_vma_unbind(struct i915_vma *vma, bool wait) 3765 { 3766 struct drm_i915_gem_object *obj = vma->obj; 3767 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3768 int ret; 3769 3770 if (list_empty(&vma->vma_link)) 3771 return 0; 3772 3773 if (!drm_mm_node_allocated(&vma->node)) { 3774 i915_gem_vma_destroy(vma); 3775 return 0; 3776 } 3777 3778 if (vma->pin_count) 3779 return -EBUSY; 3780 3781 BUG_ON(obj->pages == NULL); 3782 3783 if (wait) { 3784 ret = i915_gem_object_wait_rendering(obj, false); 3785 if (ret) 3786 return ret; 3787 } 3788 3789 if (i915_is_ggtt(vma->vm) && 3790 vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { 3791 i915_gem_object_finish_gtt(obj); 3792 3793 /* release the fence reg _after_ flushing */ 3794 ret = i915_gem_object_put_fence(obj); 3795 if (ret) 3796 return ret; 3797 } 3798 3799 trace_i915_vma_unbind(vma); 3800 3801 vma->vm->unbind_vma(vma); 3802 vma->bound = 0; 3803 3804 list_del_init(&vma->mm_list); 3805 if (i915_is_ggtt(vma->vm)) { 3806 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { 3807 obj->map_and_fenceable = false; 3808 } else if (vma->ggtt_view.pages) { 3809 sg_free_table(vma->ggtt_view.pages); 3810 kfree(vma->ggtt_view.pages); 3811 } 3812 vma->ggtt_view.pages = NULL; 3813 } 3814 3815 drm_mm_remove_node(&vma->node); 3816 i915_gem_vma_destroy(vma); 3817 3818 /* Since the unbound list is global, only move to that list if 3819 * no more VMAs exist. */ 3820 if (list_empty(&obj->vma_list)) 3821 list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list); 3822 3823 /* And finally now the object is completely decoupled from this vma, 3824 * we can drop its hold on the backing storage and allow it to be 3825 * reaped by the shrinker. 3826 */ 3827 i915_gem_object_unpin_pages(obj); 3828 3829 return 0; 3830 } 3831 3832 int i915_vma_unbind(struct i915_vma *vma) 3833 { 3834 return __i915_vma_unbind(vma, true); 3835 } 3836 3837 int __i915_vma_unbind_no_wait(struct i915_vma *vma) 3838 { 3839 return __i915_vma_unbind(vma, false); 3840 } 3841 3842 int i915_gpu_idle(struct drm_device *dev) 3843 { 3844 struct drm_i915_private *dev_priv = dev->dev_private; 3845 struct intel_engine_cs *ring; 3846 int ret, i; 3847 3848 /* Flush everything onto the inactive list. */ 3849 for_each_ring(ring, dev_priv, i) { 3850 if (!i915.enable_execlists) { 3851 struct drm_i915_gem_request *req; 3852 3853 ret = i915_gem_request_alloc(ring, ring->default_context, &req); 3854 if (ret) 3855 return ret; 3856 3857 ret = i915_switch_context(req); 3858 if (ret) { 3859 i915_gem_request_cancel(req); 3860 return ret; 3861 } 3862 3863 i915_add_request_no_flush(req); 3864 } 3865 3866 ret = intel_ring_idle(ring); 3867 if (ret) 3868 return ret; 3869 } 3870 3871 WARN_ON(i915_verify_lists(dev)); 3872 return 0; 3873 } 3874 3875 static bool i915_gem_valid_gtt_space(struct i915_vma *vma, 3876 unsigned long cache_level) 3877 { 3878 struct drm_mm_node *gtt_space = &vma->node; 3879 struct drm_mm_node *other; 3880 3881 /* 3882 * On some machines we have to be careful when putting differing types 3883 * of snoopable memory together to avoid the prefetcher crossing memory 3884 * domains and dying. During vm initialisation, we decide whether or not 3885 * these constraints apply and set the drm_mm.color_adjust 3886 * appropriately. 3887 */ 3888 if (vma->vm->mm.color_adjust == NULL) 3889 return true; 3890 3891 if (!drm_mm_node_allocated(gtt_space)) 3892 return true; 3893 3894 if (list_empty(>t_space->node_list)) 3895 return true; 3896 3897 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list); 3898 if (other->allocated && !other->hole_follows && other->color != cache_level) 3899 return false; 3900 3901 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list); 3902 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level) 3903 return false; 3904 3905 return true; 3906 } 3907 3908 /** 3909 * Finds free space in the GTT aperture and binds the object or a view of it 3910 * there. 3911 */ 3912 static struct i915_vma * 3913 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, 3914 struct i915_address_space *vm, 3915 const struct i915_ggtt_view *ggtt_view, 3916 unsigned alignment, 3917 uint64_t flags) 3918 { 3919 struct drm_device *dev = obj->base.dev; 3920 struct drm_i915_private *dev_priv = dev->dev_private; 3921 u32 fence_alignment, unfenced_alignment; 3922 u32 search_flag, alloc_flag; 3923 u64 start, end; 3924 u64 size, fence_size; 3925 struct i915_vma *vma; 3926 int ret; 3927 3928 if (i915_is_ggtt(vm)) { 3929 u32 view_size; 3930 3931 if (WARN_ON(!ggtt_view)) 3932 return ERR_PTR(-EINVAL); 3933 3934 view_size = i915_ggtt_view_size(obj, ggtt_view); 3935 3936 fence_size = i915_gem_get_gtt_size(dev, 3937 view_size, 3938 obj->tiling_mode); 3939 fence_alignment = i915_gem_get_gtt_alignment(dev, 3940 view_size, 3941 obj->tiling_mode, 3942 true); 3943 unfenced_alignment = i915_gem_get_gtt_alignment(dev, 3944 view_size, 3945 obj->tiling_mode, 3946 false); 3947 size = flags & PIN_MAPPABLE ? fence_size : view_size; 3948 } else { 3949 fence_size = i915_gem_get_gtt_size(dev, 3950 obj->base.size, 3951 obj->tiling_mode); 3952 fence_alignment = i915_gem_get_gtt_alignment(dev, 3953 obj->base.size, 3954 obj->tiling_mode, 3955 true); 3956 unfenced_alignment = 3957 i915_gem_get_gtt_alignment(dev, 3958 obj->base.size, 3959 obj->tiling_mode, 3960 false); 3961 size = flags & PIN_MAPPABLE ? fence_size : obj->base.size; 3962 } 3963 3964 start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; 3965 end = vm->total; 3966 if (flags & PIN_MAPPABLE) 3967 end = min_t(u64, end, dev_priv->gtt.mappable_end); 3968 if (flags & PIN_ZONE_4G) 3969 end = min_t(u64, end, (1ULL << 32)); 3970 3971 if (alignment == 0) 3972 alignment = flags & PIN_MAPPABLE ? fence_alignment : 3973 unfenced_alignment; 3974 if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) { 3975 DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n", 3976 ggtt_view ? ggtt_view->type : 0, 3977 alignment); 3978 return ERR_PTR(-EINVAL); 3979 } 3980 3981 /* If binding the object/GGTT view requires more space than the entire 3982 * aperture has, reject it early before evicting everything in a vain 3983 * attempt to find space. 3984 */ 3985 if (size > end) { 3986 DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%llu > %s aperture=%llu\n", 3987 ggtt_view ? ggtt_view->type : 0, 3988 size, 3989 flags & PIN_MAPPABLE ? "mappable" : "total", 3990 end); 3991 return ERR_PTR(-E2BIG); 3992 } 3993 3994 ret = i915_gem_object_get_pages(obj); 3995 if (ret) 3996 return ERR_PTR(ret); 3997 3998 i915_gem_object_pin_pages(obj); 3999 4000 vma = ggtt_view ? i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) : 4001 i915_gem_obj_lookup_or_create_vma(obj, vm); 4002 4003 if (IS_ERR(vma)) 4004 goto err_unpin; 4005 4006 if (flags & PIN_HIGH) { 4007 search_flag = DRM_MM_SEARCH_BELOW; 4008 alloc_flag = DRM_MM_CREATE_TOP; 4009 } else { 4010 search_flag = DRM_MM_SEARCH_DEFAULT; 4011 alloc_flag = DRM_MM_CREATE_DEFAULT; 4012 } 4013 4014 search_free: 4015 ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, 4016 size, alignment, 4017 obj->cache_level, 4018 start, end, 4019 search_flag, 4020 alloc_flag); 4021 if (ret) { 4022 ret = i915_gem_evict_something(dev, vm, size, alignment, 4023 obj->cache_level, 4024 start, end, 4025 flags); 4026 if (ret == 0) 4027 goto search_free; 4028 4029 goto err_free_vma; 4030 } 4031 if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) { 4032 ret = -EINVAL; 4033 goto err_remove_node; 4034 } 4035 4036 trace_i915_vma_bind(vma, flags); 4037 ret = i915_vma_bind(vma, obj->cache_level, flags); 4038 if (ret) 4039 goto err_remove_node; 4040 4041 list_move_tail(&obj->global_list, &dev_priv->mm.bound_list); 4042 list_add_tail(&vma->mm_list, &vm->inactive_list); 4043 4044 return vma; 4045 4046 err_remove_node: 4047 drm_mm_remove_node(&vma->node); 4048 err_free_vma: 4049 i915_gem_vma_destroy(vma); 4050 vma = ERR_PTR(ret); 4051 err_unpin: 4052 i915_gem_object_unpin_pages(obj); 4053 return vma; 4054 } 4055 4056 bool 4057 i915_gem_clflush_object(struct drm_i915_gem_object *obj, 4058 bool force) 4059 { 4060 /* If we don't have a page list set up, then we're not pinned 4061 * to GPU, and we can ignore the cache flush because it'll happen 4062 * again at bind time. 4063 */ 4064 if (obj->pages == NULL) 4065 return false; 4066 4067 /* 4068 * Stolen memory is always coherent with the GPU as it is explicitly 4069 * marked as wc by the system, or the system is cache-coherent. 4070 */ 4071 if (obj->stolen || obj->phys_handle) 4072 return false; 4073 4074 /* If the GPU is snooping the contents of the CPU cache, 4075 * we do not need to manually clear the CPU cache lines. However, 4076 * the caches are only snooped when the render cache is 4077 * flushed/invalidated. As we always have to emit invalidations 4078 * and flushes when moving into and out of the RENDER domain, correct 4079 * snooping behaviour occurs naturally as the result of our domain 4080 * tracking. 4081 */ 4082 if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) { 4083 obj->cache_dirty = true; 4084 return false; 4085 } 4086 4087 trace_i915_gem_object_clflush(obj); 4088 drm_clflush_sg(obj->pages); 4089 obj->cache_dirty = false; 4090 4091 return true; 4092 } 4093 4094 /** Flushes the GTT write domain for the object if it's dirty. */ 4095 static void 4096 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) 4097 { 4098 uint32_t old_write_domain; 4099 4100 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) 4101 return; 4102 4103 /* No actual flushing is required for the GTT write domain. Writes 4104 * to it immediately go to main memory as far as we know, so there's 4105 * no chipset flush. It also doesn't land in render cache. 4106 * 4107 * However, we do have to enforce the order so that all writes through 4108 * the GTT land before any writes to the device, such as updates to 4109 * the GATT itself. 4110 */ 4111 wmb(); 4112 4113 old_write_domain = obj->base.write_domain; 4114 obj->base.write_domain = 0; 4115 4116 intel_fb_obj_flush(obj, false, ORIGIN_GTT); 4117 4118 trace_i915_gem_object_change_domain(obj, 4119 obj->base.read_domains, 4120 old_write_domain); 4121 } 4122 4123 /** Flushes the CPU write domain for the object if it's dirty. */ 4124 static void 4125 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) 4126 { 4127 uint32_t old_write_domain; 4128 4129 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) 4130 return; 4131 4132 if (i915_gem_clflush_object(obj, obj->pin_display)) 4133 i915_gem_chipset_flush(obj->base.dev); 4134 4135 old_write_domain = obj->base.write_domain; 4136 obj->base.write_domain = 0; 4137 4138 intel_fb_obj_flush(obj, false, ORIGIN_CPU); 4139 4140 trace_i915_gem_object_change_domain(obj, 4141 obj->base.read_domains, 4142 old_write_domain); 4143 } 4144 4145 /** 4146 * Moves a single object to the GTT read, and possibly write domain. 4147 * 4148 * This function returns when the move is complete, including waiting on 4149 * flushes to occur. 4150 */ 4151 int 4152 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 4153 { 4154 uint32_t old_write_domain, old_read_domains; 4155 struct i915_vma *vma; 4156 int ret; 4157 4158 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) 4159 return 0; 4160 4161 ret = i915_gem_object_wait_rendering(obj, !write); 4162 if (ret) 4163 return ret; 4164 4165 /* Flush and acquire obj->pages so that we are coherent through 4166 * direct access in memory with previous cached writes through 4167 * shmemfs and that our cache domain tracking remains valid. 4168 * For example, if the obj->filp was moved to swap without us 4169 * being notified and releasing the pages, we would mistakenly 4170 * continue to assume that the obj remained out of the CPU cached 4171 * domain. 4172 */ 4173 ret = i915_gem_object_get_pages(obj); 4174 if (ret) 4175 return ret; 4176 4177 i915_gem_object_flush_cpu_write_domain(obj); 4178 4179 /* Serialise direct access to this object with the barriers for 4180 * coherent writes from the GPU, by effectively invalidating the 4181 * GTT domain upon first access. 4182 */ 4183 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 4184 mb(); 4185 4186 old_write_domain = obj->base.write_domain; 4187 old_read_domains = obj->base.read_domains; 4188 4189 /* It should now be out of any other write domains, and we can update 4190 * the domain values for our changes. 4191 */ 4192 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 4193 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 4194 if (write) { 4195 obj->base.read_domains = I915_GEM_DOMAIN_GTT; 4196 obj->base.write_domain = I915_GEM_DOMAIN_GTT; 4197 obj->dirty = 1; 4198 } 4199 4200 trace_i915_gem_object_change_domain(obj, 4201 old_read_domains, 4202 old_write_domain); 4203 4204 /* And bump the LRU for this access */ 4205 vma = i915_gem_obj_to_ggtt(obj); 4206 if (vma && drm_mm_node_allocated(&vma->node) && !obj->active) 4207 list_move_tail(&vma->mm_list, 4208 &to_i915(obj->base.dev)->gtt.base.inactive_list); 4209 4210 return 0; 4211 } 4212 4213 /** 4214 * Changes the cache-level of an object across all VMA. 4215 * 4216 * After this function returns, the object will be in the new cache-level 4217 * across all GTT and the contents of the backing storage will be coherent, 4218 * with respect to the new cache-level. In order to keep the backing storage 4219 * coherent for all users, we only allow a single cache level to be set 4220 * globally on the object and prevent it from being changed whilst the 4221 * hardware is reading from the object. That is if the object is currently 4222 * on the scanout it will be set to uncached (or equivalent display 4223 * cache coherency) and all non-MOCS GPU access will also be uncached so 4224 * that all direct access to the scanout remains coherent. 4225 */ 4226 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 4227 enum i915_cache_level cache_level) 4228 { 4229 struct drm_device *dev = obj->base.dev; 4230 struct i915_vma *vma, *next; 4231 bool bound = false; 4232 int ret = 0; 4233 4234 if (obj->cache_level == cache_level) 4235 goto out; 4236 4237 /* Inspect the list of currently bound VMA and unbind any that would 4238 * be invalid given the new cache-level. This is principally to 4239 * catch the issue of the CS prefetch crossing page boundaries and 4240 * reading an invalid PTE on older architectures. 4241 */ 4242 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) { 4243 if (!drm_mm_node_allocated(&vma->node)) 4244 continue; 4245 4246 if (vma->pin_count) { 4247 DRM_DEBUG("can not change the cache level of pinned objects\n"); 4248 return -EBUSY; 4249 } 4250 4251 if (!i915_gem_valid_gtt_space(vma, cache_level)) { 4252 ret = i915_vma_unbind(vma); 4253 if (ret) 4254 return ret; 4255 } else 4256 bound = true; 4257 } 4258 4259 /* We can reuse the existing drm_mm nodes but need to change the 4260 * cache-level on the PTE. We could simply unbind them all and 4261 * rebind with the correct cache-level on next use. However since 4262 * we already have a valid slot, dma mapping, pages etc, we may as 4263 * rewrite the PTE in the belief that doing so tramples upon less 4264 * state and so involves less work. 4265 */ 4266 if (bound) { 4267 /* Before we change the PTE, the GPU must not be accessing it. 4268 * If we wait upon the object, we know that all the bound 4269 * VMA are no longer active. 4270 */ 4271 ret = i915_gem_object_wait_rendering(obj, false); 4272 if (ret) 4273 return ret; 4274 4275 if (!HAS_LLC(dev) && cache_level != I915_CACHE_NONE) { 4276 /* Access to snoopable pages through the GTT is 4277 * incoherent and on some machines causes a hard 4278 * lockup. Relinquish the CPU mmaping to force 4279 * userspace to refault in the pages and we can 4280 * then double check if the GTT mapping is still 4281 * valid for that pointer access. 4282 */ 4283 i915_gem_release_mmap(obj); 4284 4285 /* As we no longer need a fence for GTT access, 4286 * we can relinquish it now (and so prevent having 4287 * to steal a fence from someone else on the next 4288 * fence request). Note GPU activity would have 4289 * dropped the fence as all snoopable access is 4290 * supposed to be linear. 4291 */ 4292 ret = i915_gem_object_put_fence(obj); 4293 if (ret) 4294 return ret; 4295 } else { 4296 /* We either have incoherent backing store and 4297 * so no GTT access or the architecture is fully 4298 * coherent. In such cases, existing GTT mmaps 4299 * ignore the cache bit in the PTE and we can 4300 * rewrite it without confusing the GPU or having 4301 * to force userspace to fault back in its mmaps. 4302 */ 4303 } 4304 4305 list_for_each_entry(vma, &obj->vma_list, vma_link) { 4306 if (!drm_mm_node_allocated(&vma->node)) 4307 continue; 4308 4309 ret = i915_vma_bind(vma, cache_level, PIN_UPDATE); 4310 if (ret) 4311 return ret; 4312 } 4313 } 4314 4315 list_for_each_entry(vma, &obj->vma_list, vma_link) 4316 vma->node.color = cache_level; 4317 obj->cache_level = cache_level; 4318 4319 out: 4320 /* Flush the dirty CPU caches to the backing storage so that the 4321 * object is now coherent at its new cache level (with respect 4322 * to the access domain). 4323 */ 4324 if (obj->cache_dirty && 4325 obj->base.write_domain != I915_GEM_DOMAIN_CPU && 4326 cpu_write_needs_clflush(obj)) { 4327 if (i915_gem_clflush_object(obj, true)) 4328 i915_gem_chipset_flush(obj->base.dev); 4329 } 4330 4331 return 0; 4332 } 4333 4334 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 4335 struct drm_file *file) 4336 { 4337 struct drm_i915_gem_caching *args = data; 4338 struct drm_i915_gem_object *obj; 4339 4340 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4341 if (&obj->base == NULL) 4342 return -ENOENT; 4343 4344 switch (obj->cache_level) { 4345 case I915_CACHE_LLC: 4346 case I915_CACHE_L3_LLC: 4347 args->caching = I915_CACHING_CACHED; 4348 break; 4349 4350 case I915_CACHE_WT: 4351 args->caching = I915_CACHING_DISPLAY; 4352 break; 4353 4354 default: 4355 args->caching = I915_CACHING_NONE; 4356 break; 4357 } 4358 4359 drm_gem_object_unreference_unlocked(&obj->base); 4360 return 0; 4361 } 4362 4363 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 4364 struct drm_file *file) 4365 { 4366 struct drm_i915_private *dev_priv = dev->dev_private; 4367 struct drm_i915_gem_caching *args = data; 4368 struct drm_i915_gem_object *obj; 4369 enum i915_cache_level level; 4370 int ret; 4371 4372 switch (args->caching) { 4373 case I915_CACHING_NONE: 4374 level = I915_CACHE_NONE; 4375 break; 4376 case I915_CACHING_CACHED: 4377 /* 4378 * Due to a HW issue on BXT A stepping, GPU stores via a 4379 * snooped mapping may leave stale data in a corresponding CPU 4380 * cacheline, whereas normally such cachelines would get 4381 * invalidated. 4382 */ 4383 if (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0) 4384 return -ENODEV; 4385 4386 level = I915_CACHE_LLC; 4387 break; 4388 case I915_CACHING_DISPLAY: 4389 level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE; 4390 break; 4391 default: 4392 return -EINVAL; 4393 } 4394 4395 intel_runtime_pm_get(dev_priv); 4396 4397 ret = i915_mutex_lock_interruptible(dev); 4398 if (ret) 4399 goto rpm_put; 4400 4401 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4402 if (&obj->base == NULL) { 4403 ret = -ENOENT; 4404 goto unlock; 4405 } 4406 4407 ret = i915_gem_object_set_cache_level(obj, level); 4408 4409 drm_gem_object_unreference(&obj->base); 4410 unlock: 4411 mutex_unlock(&dev->struct_mutex); 4412 rpm_put: 4413 intel_runtime_pm_put(dev_priv); 4414 4415 return ret; 4416 } 4417 4418 /* 4419 * Prepare buffer for display plane (scanout, cursors, etc). 4420 * Can be called from an uninterruptible phase (modesetting) and allows 4421 * any flushes to be pipelined (for pageflips). 4422 */ 4423 int 4424 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 4425 u32 alignment, 4426 struct intel_engine_cs *pipelined, 4427 struct drm_i915_gem_request **pipelined_request, 4428 const struct i915_ggtt_view *view) 4429 { 4430 u32 old_read_domains, old_write_domain; 4431 int ret; 4432 4433 ret = i915_gem_object_sync(obj, pipelined, pipelined_request); 4434 if (ret) 4435 return ret; 4436 4437 /* Mark the pin_display early so that we account for the 4438 * display coherency whilst setting up the cache domains. 4439 */ 4440 obj->pin_display++; 4441 4442 /* The display engine is not coherent with the LLC cache on gen6. As 4443 * a result, we make sure that the pinning that is about to occur is 4444 * done with uncached PTEs. This is lowest common denominator for all 4445 * chipsets. 4446 * 4447 * However for gen6+, we could do better by using the GFDT bit instead 4448 * of uncaching, which would allow us to flush all the LLC-cached data 4449 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 4450 */ 4451 ret = i915_gem_object_set_cache_level(obj, 4452 HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE); 4453 if (ret) 4454 goto err_unpin_display; 4455 4456 /* As the user may map the buffer once pinned in the display plane 4457 * (e.g. libkms for the bootup splash), we have to ensure that we 4458 * always use map_and_fenceable for all scanout buffers. 4459 */ 4460 ret = i915_gem_object_ggtt_pin(obj, view, alignment, 4461 view->type == I915_GGTT_VIEW_NORMAL ? 4462 PIN_MAPPABLE : 0); 4463 if (ret) 4464 goto err_unpin_display; 4465 4466 i915_gem_object_flush_cpu_write_domain(obj); 4467 4468 old_write_domain = obj->base.write_domain; 4469 old_read_domains = obj->base.read_domains; 4470 4471 /* It should now be out of any other write domains, and we can update 4472 * the domain values for our changes. 4473 */ 4474 obj->base.write_domain = 0; 4475 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 4476 4477 trace_i915_gem_object_change_domain(obj, 4478 old_read_domains, 4479 old_write_domain); 4480 4481 return 0; 4482 4483 err_unpin_display: 4484 obj->pin_display--; 4485 return ret; 4486 } 4487 4488 void 4489 i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj, 4490 const struct i915_ggtt_view *view) 4491 { 4492 if (WARN_ON(obj->pin_display == 0)) 4493 return; 4494 4495 i915_gem_object_ggtt_unpin_view(obj, view); 4496 4497 obj->pin_display--; 4498 } 4499 4500 /** 4501 * Moves a single object to the CPU read, and possibly write domain. 4502 * 4503 * This function returns when the move is complete, including waiting on 4504 * flushes to occur. 4505 */ 4506 int 4507 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 4508 { 4509 uint32_t old_write_domain, old_read_domains; 4510 int ret; 4511 4512 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) 4513 return 0; 4514 4515 ret = i915_gem_object_wait_rendering(obj, !write); 4516 if (ret) 4517 return ret; 4518 4519 i915_gem_object_flush_gtt_write_domain(obj); 4520 4521 old_write_domain = obj->base.write_domain; 4522 old_read_domains = obj->base.read_domains; 4523 4524 /* Flush the CPU cache if it's still invalid. */ 4525 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { 4526 i915_gem_clflush_object(obj, false); 4527 4528 obj->base.read_domains |= I915_GEM_DOMAIN_CPU; 4529 } 4530 4531 /* It should now be out of any other write domains, and we can update 4532 * the domain values for our changes. 4533 */ 4534 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 4535 4536 /* If we're writing through the CPU, then the GPU read domains will 4537 * need to be invalidated at next use. 4538 */ 4539 if (write) { 4540 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4541 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4542 } 4543 4544 trace_i915_gem_object_change_domain(obj, 4545 old_read_domains, 4546 old_write_domain); 4547 4548 return 0; 4549 } 4550 4551 /* Throttle our rendering by waiting until the ring has completed our requests 4552 * emitted over 20 msec ago. 4553 * 4554 * Note that if we were to use the current jiffies each time around the loop, 4555 * we wouldn't escape the function with any frames outstanding if the time to 4556 * render a frame was over 20ms. 4557 * 4558 * This should get us reasonable parallelism between CPU and GPU but also 4559 * relatively low latency when blocking on a particular request to finish. 4560 */ 4561 static int 4562 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 4563 { 4564 struct drm_i915_private *dev_priv = dev->dev_private; 4565 struct drm_i915_file_private *file_priv = file->driver_priv; 4566 unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES; 4567 struct drm_i915_gem_request *request, *target = NULL; 4568 unsigned reset_counter; 4569 int ret; 4570 4571 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 4572 if (ret) 4573 return ret; 4574 4575 ret = i915_gem_check_wedge(&dev_priv->gpu_error, false); 4576 if (ret) 4577 return ret; 4578 4579 spin_lock(&file_priv->mm.lock); 4580 list_for_each_entry(request, &file_priv->mm.request_list, client_list) { 4581 if (time_after_eq(request->emitted_jiffies, recent_enough)) 4582 break; 4583 4584 /* 4585 * Note that the request might not have been submitted yet. 4586 * In which case emitted_jiffies will be zero. 4587 */ 4588 if (!request->emitted_jiffies) 4589 continue; 4590 4591 target = request; 4592 } 4593 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 4594 if (target) 4595 i915_gem_request_reference(target); 4596 spin_unlock(&file_priv->mm.lock); 4597 4598 if (target == NULL) 4599 return 0; 4600 4601 ret = __i915_wait_request(target, reset_counter, true, NULL, NULL); 4602 if (ret == 0) 4603 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0); 4604 4605 i915_gem_request_unreference__unlocked(target); 4606 4607 return ret; 4608 } 4609 4610 static bool 4611 i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags) 4612 { 4613 struct drm_i915_gem_object *obj = vma->obj; 4614 4615 if (alignment && 4616 vma->node.start & (alignment - 1)) 4617 return true; 4618 4619 if (flags & PIN_MAPPABLE && !obj->map_and_fenceable) 4620 return true; 4621 4622 if (flags & PIN_OFFSET_BIAS && 4623 vma->node.start < (flags & PIN_OFFSET_MASK)) 4624 return true; 4625 4626 return false; 4627 } 4628 4629 void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) 4630 { 4631 struct drm_i915_gem_object *obj = vma->obj; 4632 bool mappable, fenceable; 4633 u32 fence_size, fence_alignment; 4634 4635 fence_size = i915_gem_get_gtt_size(obj->base.dev, 4636 obj->base.size, 4637 obj->tiling_mode); 4638 fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev, 4639 obj->base.size, 4640 obj->tiling_mode, 4641 true); 4642 4643 fenceable = (vma->node.size == fence_size && 4644 (vma->node.start & (fence_alignment - 1)) == 0); 4645 4646 mappable = (vma->node.start + fence_size <= 4647 to_i915(obj->base.dev)->gtt.mappable_end); 4648 4649 obj->map_and_fenceable = mappable && fenceable; 4650 } 4651 4652 static int 4653 i915_gem_object_do_pin(struct drm_i915_gem_object *obj, 4654 struct i915_address_space *vm, 4655 const struct i915_ggtt_view *ggtt_view, 4656 uint32_t alignment, 4657 uint64_t flags) 4658 { 4659 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 4660 struct i915_vma *vma; 4661 unsigned bound; 4662 int ret; 4663 4664 if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base)) 4665 return -ENODEV; 4666 4667 if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm))) 4668 return -EINVAL; 4669 4670 if (WARN_ON((flags & (PIN_MAPPABLE | PIN_GLOBAL)) == PIN_MAPPABLE)) 4671 return -EINVAL; 4672 4673 if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view)) 4674 return -EINVAL; 4675 4676 vma = ggtt_view ? i915_gem_obj_to_ggtt_view(obj, ggtt_view) : 4677 i915_gem_obj_to_vma(obj, vm); 4678 4679 if (IS_ERR(vma)) 4680 return PTR_ERR(vma); 4681 4682 if (vma) { 4683 if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) 4684 return -EBUSY; 4685 4686 if (i915_vma_misplaced(vma, alignment, flags)) { 4687 WARN(vma->pin_count, 4688 "bo is already pinned in %s with incorrect alignment:" 4689 " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d," 4690 " obj->map_and_fenceable=%d\n", 4691 ggtt_view ? "ggtt" : "ppgtt", 4692 upper_32_bits(vma->node.start), 4693 lower_32_bits(vma->node.start), 4694 alignment, 4695 !!(flags & PIN_MAPPABLE), 4696 obj->map_and_fenceable); 4697 ret = i915_vma_unbind(vma); 4698 if (ret) 4699 return ret; 4700 4701 vma = NULL; 4702 } 4703 } 4704 4705 bound = vma ? vma->bound : 0; 4706 if (vma == NULL || !drm_mm_node_allocated(&vma->node)) { 4707 vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment, 4708 flags); 4709 if (IS_ERR(vma)) 4710 return PTR_ERR(vma); 4711 } else { 4712 ret = i915_vma_bind(vma, obj->cache_level, flags); 4713 if (ret) 4714 return ret; 4715 } 4716 4717 if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL && 4718 (bound ^ vma->bound) & GLOBAL_BIND) { 4719 __i915_vma_set_map_and_fenceable(vma); 4720 WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable); 4721 } 4722 4723 vma->pin_count++; 4724 return 0; 4725 } 4726 4727 int 4728 i915_gem_object_pin(struct drm_i915_gem_object *obj, 4729 struct i915_address_space *vm, 4730 uint32_t alignment, 4731 uint64_t flags) 4732 { 4733 return i915_gem_object_do_pin(obj, vm, 4734 i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL, 4735 alignment, flags); 4736 } 4737 4738 int 4739 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, 4740 const struct i915_ggtt_view *view, 4741 uint32_t alignment, 4742 uint64_t flags) 4743 { 4744 if (WARN_ONCE(!view, "no view specified")) 4745 return -EINVAL; 4746 4747 return i915_gem_object_do_pin(obj, i915_obj_to_ggtt(obj), view, 4748 alignment, flags | PIN_GLOBAL); 4749 } 4750 4751 void 4752 i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj, 4753 const struct i915_ggtt_view *view) 4754 { 4755 struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view); 4756 4757 BUG_ON(!vma); 4758 WARN_ON(vma->pin_count == 0); 4759 WARN_ON(!i915_gem_obj_ggtt_bound_view(obj, view)); 4760 4761 --vma->pin_count; 4762 } 4763 4764 int 4765 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 4766 struct drm_file *file) 4767 { 4768 struct drm_i915_gem_busy *args = data; 4769 struct drm_i915_gem_object *obj; 4770 int ret; 4771 4772 ret = i915_mutex_lock_interruptible(dev); 4773 if (ret) 4774 return ret; 4775 4776 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4777 if (&obj->base == NULL) { 4778 ret = -ENOENT; 4779 goto unlock; 4780 } 4781 4782 /* Count all active objects as busy, even if they are currently not used 4783 * by the gpu. Users of this interface expect objects to eventually 4784 * become non-busy without any further actions, therefore emit any 4785 * necessary flushes here. 4786 */ 4787 ret = i915_gem_object_flush_active(obj); 4788 if (ret) 4789 goto unref; 4790 4791 BUILD_BUG_ON(I915_NUM_RINGS > 16); 4792 args->busy = obj->active << 16; 4793 if (obj->last_write_req) 4794 args->busy |= obj->last_write_req->ring->id; 4795 4796 unref: 4797 drm_gem_object_unreference(&obj->base); 4798 unlock: 4799 mutex_unlock(&dev->struct_mutex); 4800 return ret; 4801 } 4802 4803 int 4804 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 4805 struct drm_file *file_priv) 4806 { 4807 return i915_gem_ring_throttle(dev, file_priv); 4808 } 4809 4810 int 4811 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 4812 struct drm_file *file_priv) 4813 { 4814 struct drm_i915_private *dev_priv = dev->dev_private; 4815 struct drm_i915_gem_madvise *args = data; 4816 struct drm_i915_gem_object *obj; 4817 int ret; 4818 4819 switch (args->madv) { 4820 case I915_MADV_DONTNEED: 4821 case I915_MADV_WILLNEED: 4822 break; 4823 default: 4824 return -EINVAL; 4825 } 4826 4827 ret = i915_mutex_lock_interruptible(dev); 4828 if (ret) 4829 return ret; 4830 4831 obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle)); 4832 if (&obj->base == NULL) { 4833 ret = -ENOENT; 4834 goto unlock; 4835 } 4836 4837 if (i915_gem_obj_is_pinned(obj)) { 4838 ret = -EINVAL; 4839 goto out; 4840 } 4841 4842 if (obj->pages && 4843 obj->tiling_mode != I915_TILING_NONE && 4844 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 4845 if (obj->madv == I915_MADV_WILLNEED) 4846 i915_gem_object_unpin_pages(obj); 4847 if (args->madv == I915_MADV_WILLNEED) 4848 i915_gem_object_pin_pages(obj); 4849 } 4850 4851 if (obj->madv != __I915_MADV_PURGED) 4852 obj->madv = args->madv; 4853 4854 /* if the object is no longer attached, discard its backing storage */ 4855 if (obj->madv == I915_MADV_DONTNEED && obj->pages == NULL) 4856 i915_gem_object_truncate(obj); 4857 4858 args->retained = obj->madv != __I915_MADV_PURGED; 4859 4860 out: 4861 drm_gem_object_unreference(&obj->base); 4862 unlock: 4863 mutex_unlock(&dev->struct_mutex); 4864 return ret; 4865 } 4866 4867 void i915_gem_object_init(struct drm_i915_gem_object *obj, 4868 const struct drm_i915_gem_object_ops *ops) 4869 { 4870 int i; 4871 4872 INIT_LIST_HEAD(&obj->global_list); 4873 for (i = 0; i < I915_NUM_RINGS; i++) 4874 INIT_LIST_HEAD(&obj->ring_list[i]); 4875 INIT_LIST_HEAD(&obj->obj_exec_link); 4876 INIT_LIST_HEAD(&obj->vma_list); 4877 INIT_LIST_HEAD(&obj->batch_pool_link); 4878 4879 obj->ops = ops; 4880 4881 obj->fence_reg = I915_FENCE_REG_NONE; 4882 obj->madv = I915_MADV_WILLNEED; 4883 4884 i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size); 4885 } 4886 4887 static const struct drm_i915_gem_object_ops i915_gem_object_ops = { 4888 .get_pages = i915_gem_object_get_pages_gtt, 4889 .put_pages = i915_gem_object_put_pages_gtt, 4890 }; 4891 4892 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, 4893 size_t size) 4894 { 4895 struct drm_i915_gem_object *obj; 4896 #ifdef __linux__ 4897 struct address_space *mapping; 4898 gfp_t mask; 4899 #endif 4900 4901 obj = i915_gem_object_alloc(dev); 4902 if (obj == NULL) 4903 return NULL; 4904 4905 if (drm_gem_object_init(dev, &obj->base, size) != 0) { 4906 i915_gem_object_free(obj); 4907 return NULL; 4908 } 4909 4910 #ifdef __linux__ 4911 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 4912 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) { 4913 /* 965gm cannot relocate objects above 4GiB. */ 4914 mask &= ~__GFP_HIGHMEM; 4915 mask |= __GFP_DMA32; 4916 } 4917 4918 mapping = file_inode(obj->base.filp)->i_mapping; 4919 mapping_set_gfp_mask(mapping, mask); 4920 #endif 4921 4922 i915_gem_object_init(obj, &i915_gem_object_ops); 4923 4924 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4925 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4926 4927 if (HAS_LLC(dev)) { 4928 /* On some devices, we can have the GPU use the LLC (the CPU 4929 * cache) for about a 10% performance improvement 4930 * compared to uncached. Graphics requests other than 4931 * display scanout are coherent with the CPU in 4932 * accessing this cache. This means in this mode we 4933 * don't need to clflush on the CPU side, and on the 4934 * GPU side we only need to flush internal caches to 4935 * get data visible to the CPU. 4936 * 4937 * However, we maintain the display planes as UC, and so 4938 * need to rebind when first used as such. 4939 */ 4940 obj->cache_level = I915_CACHE_LLC; 4941 } else 4942 obj->cache_level = I915_CACHE_NONE; 4943 4944 trace_i915_gem_object_create(obj); 4945 4946 return obj; 4947 } 4948 4949 #ifdef __linux__ 4950 static bool discard_backing_storage(struct drm_i915_gem_object *obj) 4951 { 4952 /* If we are the last user of the backing storage (be it shmemfs 4953 * pages or stolen etc), we know that the pages are going to be 4954 * immediately released. In this case, we can then skip copying 4955 * back the contents from the GPU. 4956 */ 4957 4958 if (obj->madv != I915_MADV_WILLNEED) 4959 return false; 4960 4961 if (obj->base.filp == NULL) 4962 return true; 4963 4964 /* At first glance, this looks racy, but then again so would be 4965 * userspace racing mmap against close. However, the first external 4966 * reference to the filp can only be obtained through the 4967 * i915_gem_mmap_ioctl() which safeguards us against the user 4968 * acquiring such a reference whilst we are in the middle of 4969 * freeing the object. 4970 */ 4971 return atomic_long_read(&obj->base.filp->f_count) == 1; 4972 } 4973 #endif 4974 4975 void i915_gem_free_object(struct drm_gem_object *gem_obj) 4976 { 4977 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 4978 struct drm_device *dev = obj->base.dev; 4979 struct drm_i915_private *dev_priv = dev->dev_private; 4980 struct i915_vma *vma, *next; 4981 4982 intel_runtime_pm_get(dev_priv); 4983 4984 trace_i915_gem_object_destroy(obj); 4985 4986 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) { 4987 int ret; 4988 4989 vma->pin_count = 0; 4990 ret = i915_vma_unbind(vma); 4991 if (WARN_ON(ret == -ERESTARTSYS)) { 4992 bool was_interruptible; 4993 4994 was_interruptible = dev_priv->mm.interruptible; 4995 dev_priv->mm.interruptible = false; 4996 4997 WARN_ON(i915_vma_unbind(vma)); 4998 4999 dev_priv->mm.interruptible = was_interruptible; 5000 } 5001 } 5002 5003 /* Stolen objects don't hold a ref, but do hold pin count. Fix that up 5004 * before progressing. */ 5005 if (obj->stolen) 5006 i915_gem_object_unpin_pages(obj); 5007 5008 WARN_ON(obj->frontbuffer_bits); 5009 5010 if (obj->pages && obj->madv == I915_MADV_WILLNEED && 5011 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES && 5012 obj->tiling_mode != I915_TILING_NONE) 5013 i915_gem_object_unpin_pages(obj); 5014 5015 if (WARN_ON(obj->pages_pin_count)) 5016 obj->pages_pin_count = 0; 5017 #ifdef notyet 5018 if (discard_backing_storage(obj)) 5019 obj->madv = I915_MADV_DONTNEED; 5020 #endif 5021 i915_gem_object_put_pages(obj); 5022 i915_gem_object_free_mmap_offset(obj); 5023 5024 BUG_ON(obj->pages); 5025 5026 #ifdef notyet 5027 if (obj->base.import_attach) 5028 drm_prime_gem_destroy(&obj->base, NULL); 5029 #endif 5030 5031 if (obj->ops->release) 5032 obj->ops->release(obj); 5033 5034 drm_gem_object_release(&obj->base); 5035 i915_gem_info_remove_obj(dev_priv, obj->base.size); 5036 5037 kfree(obj->bit_17); 5038 i915_gem_object_free(obj); 5039 5040 intel_runtime_pm_put(dev_priv); 5041 } 5042 5043 struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, 5044 struct i915_address_space *vm) 5045 { 5046 struct i915_vma *vma; 5047 list_for_each_entry(vma, &obj->vma_list, vma_link) { 5048 if (i915_is_ggtt(vma->vm) && 5049 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5050 continue; 5051 if (vma->vm == vm) 5052 return vma; 5053 } 5054 return NULL; 5055 } 5056 5057 struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj, 5058 const struct i915_ggtt_view *view) 5059 { 5060 struct i915_address_space *ggtt = i915_obj_to_ggtt(obj); 5061 struct i915_vma *vma; 5062 5063 if (WARN_ONCE(!view, "no view specified")) 5064 return ERR_PTR(-EINVAL); 5065 5066 list_for_each_entry(vma, &obj->vma_list, vma_link) 5067 if (vma->vm == ggtt && 5068 i915_ggtt_view_equal(&vma->ggtt_view, view)) 5069 return vma; 5070 return NULL; 5071 } 5072 5073 void i915_gem_vma_destroy(struct i915_vma *vma) 5074 { 5075 struct i915_address_space *vm = NULL; 5076 WARN_ON(vma->node.allocated); 5077 5078 /* Keep the vma as a placeholder in the execbuffer reservation lists */ 5079 if (!list_empty(&vma->exec_list)) 5080 return; 5081 5082 vm = vma->vm; 5083 5084 if (!i915_is_ggtt(vm)) 5085 i915_ppgtt_put(i915_vm_to_ppgtt(vm)); 5086 5087 list_del(&vma->vma_link); 5088 5089 #ifdef __linux__ 5090 kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma); 5091 #else 5092 pool_put(&(to_i915(vma->obj->base.dev)->vmas), vma); 5093 #endif 5094 } 5095 5096 static void 5097 i915_gem_stop_ringbuffers(struct drm_device *dev) 5098 { 5099 struct drm_i915_private *dev_priv = dev->dev_private; 5100 struct intel_engine_cs *ring; 5101 int i; 5102 5103 for_each_ring(ring, dev_priv, i) 5104 dev_priv->gt.stop_ring(ring); 5105 } 5106 5107 int 5108 i915_gem_suspend(struct drm_device *dev) 5109 { 5110 struct drm_i915_private *dev_priv = dev->dev_private; 5111 int ret = 0; 5112 5113 mutex_lock(&dev->struct_mutex); 5114 ret = i915_gpu_idle(dev); 5115 if (ret) 5116 goto err; 5117 5118 i915_gem_retire_requests(dev); 5119 5120 i915_gem_stop_ringbuffers(dev); 5121 mutex_unlock(&dev->struct_mutex); 5122 5123 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); 5124 cancel_delayed_work_sync(&dev_priv->mm.retire_work); 5125 flush_delayed_work(&dev_priv->mm.idle_work); 5126 5127 /* Assert that we sucessfully flushed all the work and 5128 * reset the GPU back to its idle, low power state. 5129 */ 5130 WARN_ON(dev_priv->mm.busy); 5131 5132 return 0; 5133 5134 err: 5135 mutex_unlock(&dev->struct_mutex); 5136 return ret; 5137 } 5138 5139 int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice) 5140 { 5141 struct intel_engine_cs *ring = req->ring; 5142 struct drm_device *dev = ring->dev; 5143 struct drm_i915_private *dev_priv = dev->dev_private; 5144 u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200); 5145 u32 *remap_info = dev_priv->l3_parity.remap_info[slice]; 5146 int i, ret; 5147 5148 if (!HAS_L3_DPF(dev) || !remap_info) 5149 return 0; 5150 5151 ret = intel_ring_begin(req, GEN7_L3LOG_SIZE / 4 * 3); 5152 if (ret) 5153 return ret; 5154 5155 /* 5156 * Note: We do not worry about the concurrent register cacheline hang 5157 * here because no other code should access these registers other than 5158 * at initialization time. 5159 */ 5160 for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) { 5161 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 5162 intel_ring_emit(ring, reg_base + i); 5163 intel_ring_emit(ring, remap_info[i/4]); 5164 } 5165 5166 intel_ring_advance(ring); 5167 5168 return ret; 5169 } 5170 5171 void i915_gem_init_swizzling(struct drm_device *dev) 5172 { 5173 struct drm_i915_private *dev_priv = dev->dev_private; 5174 5175 if (INTEL_INFO(dev)->gen < 5 || 5176 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 5177 return; 5178 5179 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 5180 DISP_TILE_SURFACE_SWIZZLING); 5181 5182 if (IS_GEN5(dev)) 5183 return; 5184 5185 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 5186 if (IS_GEN6(dev)) 5187 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 5188 else if (IS_GEN7(dev)) 5189 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 5190 else if (IS_GEN8(dev)) 5191 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW)); 5192 else 5193 BUG(); 5194 } 5195 5196 static void init_unused_ring(struct drm_device *dev, u32 base) 5197 { 5198 struct drm_i915_private *dev_priv = dev->dev_private; 5199 5200 I915_WRITE(RING_CTL(base), 0); 5201 I915_WRITE(RING_HEAD(base), 0); 5202 I915_WRITE(RING_TAIL(base), 0); 5203 I915_WRITE(RING_START(base), 0); 5204 } 5205 5206 static void init_unused_rings(struct drm_device *dev) 5207 { 5208 if (IS_I830(dev)) { 5209 init_unused_ring(dev, PRB1_BASE); 5210 init_unused_ring(dev, SRB0_BASE); 5211 init_unused_ring(dev, SRB1_BASE); 5212 init_unused_ring(dev, SRB2_BASE); 5213 init_unused_ring(dev, SRB3_BASE); 5214 } else if (IS_GEN2(dev)) { 5215 init_unused_ring(dev, SRB0_BASE); 5216 init_unused_ring(dev, SRB1_BASE); 5217 } else if (IS_GEN3(dev)) { 5218 init_unused_ring(dev, PRB1_BASE); 5219 init_unused_ring(dev, PRB2_BASE); 5220 } 5221 } 5222 5223 int i915_gem_init_rings(struct drm_device *dev) 5224 { 5225 struct drm_i915_private *dev_priv = dev->dev_private; 5226 int ret; 5227 5228 ret = intel_init_render_ring_buffer(dev); 5229 if (ret) 5230 return ret; 5231 5232 if (HAS_BSD(dev)) { 5233 ret = intel_init_bsd_ring_buffer(dev); 5234 if (ret) 5235 goto cleanup_render_ring; 5236 } 5237 5238 if (HAS_BLT(dev)) { 5239 ret = intel_init_blt_ring_buffer(dev); 5240 if (ret) 5241 goto cleanup_bsd_ring; 5242 } 5243 5244 if (HAS_VEBOX(dev)) { 5245 ret = intel_init_vebox_ring_buffer(dev); 5246 if (ret) 5247 goto cleanup_blt_ring; 5248 } 5249 5250 if (HAS_BSD2(dev)) { 5251 ret = intel_init_bsd2_ring_buffer(dev); 5252 if (ret) 5253 goto cleanup_vebox_ring; 5254 } 5255 5256 return 0; 5257 5258 cleanup_vebox_ring: 5259 intel_cleanup_ring_buffer(&dev_priv->ring[VECS]); 5260 cleanup_blt_ring: 5261 intel_cleanup_ring_buffer(&dev_priv->ring[BCS]); 5262 cleanup_bsd_ring: 5263 intel_cleanup_ring_buffer(&dev_priv->ring[VCS]); 5264 cleanup_render_ring: 5265 intel_cleanup_ring_buffer(&dev_priv->ring[RCS]); 5266 5267 return ret; 5268 } 5269 5270 int 5271 i915_gem_init_hw(struct drm_device *dev) 5272 { 5273 struct drm_i915_private *dev_priv = dev->dev_private; 5274 struct intel_engine_cs *ring; 5275 int ret, i, j; 5276 5277 if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) 5278 return -EIO; 5279 5280 /* Double layer security blanket, see i915_gem_init() */ 5281 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5282 5283 if (dev_priv->ellc_size) 5284 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf)); 5285 5286 if (IS_HASWELL(dev)) 5287 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev) ? 5288 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); 5289 5290 if (HAS_PCH_NOP(dev)) { 5291 if (IS_IVYBRIDGE(dev)) { 5292 u32 temp = I915_READ(GEN7_MSG_CTL); 5293 temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK); 5294 I915_WRITE(GEN7_MSG_CTL, temp); 5295 } else if (INTEL_INFO(dev)->gen >= 7) { 5296 u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT); 5297 temp &= ~RESET_PCH_HANDSHAKE_ENABLE; 5298 I915_WRITE(HSW_NDE_RSTWRN_OPT, temp); 5299 } 5300 } 5301 5302 i915_gem_init_swizzling(dev); 5303 5304 /* 5305 * At least 830 can leave some of the unused rings 5306 * "active" (ie. head != tail) after resume which 5307 * will prevent c3 entry. Makes sure all unused rings 5308 * are totally idle. 5309 */ 5310 init_unused_rings(dev); 5311 5312 BUG_ON(!dev_priv->ring[RCS].default_context); 5313 5314 ret = i915_ppgtt_init_hw(dev); 5315 if (ret) { 5316 DRM_ERROR("PPGTT enable HW failed %d\n", ret); 5317 goto out; 5318 } 5319 5320 /* Need to do basic initialisation of all rings first: */ 5321 for_each_ring(ring, dev_priv, i) { 5322 ret = ring->init_hw(ring); 5323 if (ret) 5324 goto out; 5325 } 5326 5327 /* We can't enable contexts until all firmware is loaded */ 5328 if (HAS_GUC_UCODE(dev)) { 5329 ret = intel_guc_ucode_load(dev); 5330 if (ret) { 5331 /* 5332 * If we got an error and GuC submission is enabled, map 5333 * the error to -EIO so the GPU will be declared wedged. 5334 * OTOH, if we didn't intend to use the GuC anyway, just 5335 * discard the error and carry on. 5336 */ 5337 DRM_ERROR("Failed to initialize GuC, error %d%s\n", ret, 5338 i915.enable_guc_submission ? "" : 5339 " (ignored)"); 5340 ret = i915.enable_guc_submission ? -EIO : 0; 5341 if (ret) 5342 goto out; 5343 } 5344 } 5345 5346 /* 5347 * Increment the next seqno by 0x100 so we have a visible break 5348 * on re-initialisation 5349 */ 5350 ret = i915_gem_set_seqno(dev, dev_priv->next_seqno+0x100); 5351 if (ret) 5352 goto out; 5353 5354 /* Now it is safe to go back round and do everything else: */ 5355 for_each_ring(ring, dev_priv, i) { 5356 struct drm_i915_gem_request *req; 5357 5358 WARN_ON(!ring->default_context); 5359 5360 ret = i915_gem_request_alloc(ring, ring->default_context, &req); 5361 if (ret) { 5362 i915_gem_cleanup_ringbuffer(dev); 5363 goto out; 5364 } 5365 5366 if (ring->id == RCS) { 5367 for (j = 0; j < NUM_L3_SLICES(dev); j++) 5368 i915_gem_l3_remap(req, j); 5369 } 5370 5371 ret = i915_ppgtt_init_ring(req); 5372 if (ret && ret != -EIO) { 5373 DRM_ERROR("PPGTT enable ring #%d failed %d\n", i, ret); 5374 i915_gem_request_cancel(req); 5375 i915_gem_cleanup_ringbuffer(dev); 5376 goto out; 5377 } 5378 5379 ret = i915_gem_context_enable(req); 5380 if (ret && ret != -EIO) { 5381 DRM_ERROR("Context enable ring #%d failed %d\n", i, ret); 5382 i915_gem_request_cancel(req); 5383 i915_gem_cleanup_ringbuffer(dev); 5384 goto out; 5385 } 5386 5387 i915_add_request_no_flush(req); 5388 } 5389 5390 out: 5391 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5392 return ret; 5393 } 5394 5395 int i915_gem_init(struct drm_device *dev) 5396 { 5397 struct drm_i915_private *dev_priv = dev->dev_private; 5398 int ret; 5399 5400 i915.enable_execlists = intel_sanitize_enable_execlists(dev, 5401 i915.enable_execlists); 5402 5403 mutex_lock(&dev->struct_mutex); 5404 5405 if (IS_VALLEYVIEW(dev)) { 5406 /* VLVA0 (potential hack), BIOS isn't actually waking us */ 5407 I915_WRITE(VLV_GTLC_WAKE_CTRL, VLV_GTLC_ALLOWWAKEREQ); 5408 if (wait_for((I915_READ(VLV_GTLC_PW_STATUS) & 5409 VLV_GTLC_ALLOWWAKEACK), 10)) 5410 DRM_DEBUG_DRIVER("allow wake ack timed out\n"); 5411 } 5412 5413 if (!i915.enable_execlists) { 5414 dev_priv->gt.execbuf_submit = i915_gem_ringbuffer_submission; 5415 dev_priv->gt.init_rings = i915_gem_init_rings; 5416 dev_priv->gt.cleanup_ring = intel_cleanup_ring_buffer; 5417 dev_priv->gt.stop_ring = intel_stop_ring_buffer; 5418 } else { 5419 dev_priv->gt.execbuf_submit = intel_execlists_submission; 5420 dev_priv->gt.init_rings = intel_logical_rings_init; 5421 dev_priv->gt.cleanup_ring = intel_logical_ring_cleanup; 5422 dev_priv->gt.stop_ring = intel_logical_ring_stop; 5423 } 5424 5425 /* This is just a security blanket to placate dragons. 5426 * On some systems, we very sporadically observe that the first TLBs 5427 * used by the CS may be stale, despite us poking the TLB reset. If 5428 * we hold the forcewake during initialisation these problems 5429 * just magically go away. 5430 */ 5431 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5432 5433 ret = i915_gem_init_userptr(dev); 5434 if (ret) 5435 goto out_unlock; 5436 5437 i915_gem_init_global_gtt(dev); 5438 5439 ret = i915_gem_context_init(dev); 5440 if (ret) 5441 goto out_unlock; 5442 5443 ret = dev_priv->gt.init_rings(dev); 5444 if (ret) 5445 goto out_unlock; 5446 5447 ret = i915_gem_init_hw(dev); 5448 if (ret == -EIO) { 5449 /* Allow ring initialisation to fail by marking the GPU as 5450 * wedged. But we only want to do this where the GPU is angry, 5451 * for all other failure, such as an allocation failure, bail. 5452 */ 5453 DRM_ERROR("Failed to initialize GPU, declaring it wedged\n"); 5454 atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter); 5455 ret = 0; 5456 } 5457 5458 out_unlock: 5459 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5460 mutex_unlock(&dev->struct_mutex); 5461 5462 return ret; 5463 } 5464 5465 void 5466 i915_gem_cleanup_ringbuffer(struct drm_device *dev) 5467 { 5468 struct drm_i915_private *dev_priv = dev->dev_private; 5469 struct intel_engine_cs *ring; 5470 int i; 5471 5472 for_each_ring(ring, dev_priv, i) 5473 dev_priv->gt.cleanup_ring(ring); 5474 5475 if (i915.enable_execlists) 5476 /* 5477 * Neither the BIOS, ourselves or any other kernel 5478 * expects the system to be in execlists mode on startup, 5479 * so we need to reset the GPU back to legacy mode. 5480 */ 5481 intel_gpu_reset(dev); 5482 } 5483 5484 static void 5485 init_ring_lists(struct intel_engine_cs *ring) 5486 { 5487 INIT_LIST_HEAD(&ring->active_list); 5488 INIT_LIST_HEAD(&ring->request_list); 5489 } 5490 5491 void 5492 i915_gem_load(struct drm_device *dev) 5493 { 5494 struct drm_i915_private *dev_priv = dev->dev_private; 5495 int i; 5496 5497 #ifdef __linux__ 5498 dev_priv->objects = 5499 kmem_cache_create("i915_gem_object", 5500 sizeof(struct drm_i915_gem_object), 0, 5501 SLAB_HWCACHE_ALIGN, 5502 NULL); 5503 dev_priv->vmas = 5504 kmem_cache_create("i915_gem_vma", 5505 sizeof(struct i915_vma), 0, 5506 SLAB_HWCACHE_ALIGN, 5507 NULL); 5508 dev_priv->requests = 5509 kmem_cache_create("i915_gem_request", 5510 sizeof(struct drm_i915_gem_request), 0, 5511 SLAB_HWCACHE_ALIGN, 5512 NULL); 5513 #else 5514 pool_init(&dev_priv->objects, sizeof(struct drm_i915_gem_object), 5515 0, IPL_NONE, 0, "drmobj", NULL); 5516 pool_init(&dev_priv->vmas, sizeof(struct drm_i915_gem_object), 5517 0, IPL_NONE, 0, "drmvma", NULL); 5518 pool_init(&dev_priv->requests, sizeof(struct drm_i915_gem_object), 5519 0, IPL_NONE, 0, "drmreq", NULL); 5520 #endif 5521 5522 INIT_LIST_HEAD(&dev_priv->vm_list); 5523 INIT_LIST_HEAD(&dev_priv->context_list); 5524 INIT_LIST_HEAD(&dev_priv->mm.unbound_list); 5525 INIT_LIST_HEAD(&dev_priv->mm.bound_list); 5526 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 5527 for (i = 0; i < I915_NUM_RINGS; i++) 5528 init_ring_lists(&dev_priv->ring[i]); 5529 for (i = 0; i < I915_MAX_NUM_FENCES; i++) 5530 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); 5531 INIT_DELAYED_WORK(&dev_priv->mm.retire_work, 5532 i915_gem_retire_work_handler); 5533 INIT_DELAYED_WORK(&dev_priv->mm.idle_work, 5534 i915_gem_idle_work_handler); 5535 init_waitqueue_head(&dev_priv->gpu_error.reset_queue); 5536 5537 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; 5538 5539 if (INTEL_INFO(dev)->gen >= 7 && !IS_VALLEYVIEW(dev)) 5540 dev_priv->num_fence_regs = 32; 5541 else if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) 5542 dev_priv->num_fence_regs = 16; 5543 else 5544 dev_priv->num_fence_regs = 8; 5545 5546 if (intel_vgpu_active(dev)) 5547 dev_priv->num_fence_regs = 5548 I915_READ(vgtif_reg(avail_rs.fence_num)); 5549 5550 /* 5551 * Set initial sequence number for requests. 5552 * Using this number allows the wraparound to happen early, 5553 * catching any obvious problems. 5554 */ 5555 dev_priv->next_seqno = ((u32)~0 - 0x1100); 5556 dev_priv->last_seqno = ((u32)~0 - 0x1101); 5557 5558 /* Initialize fence registers to zero */ 5559 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 5560 i915_gem_restore_fences(dev); 5561 5562 i915_gem_detect_bit_6_swizzle(dev); 5563 init_waitqueue_head(&dev_priv->pending_flip_queue); 5564 5565 dev_priv->mm.interruptible = true; 5566 5567 #ifdef notyet 5568 i915_gem_shrinker_init(dev_priv); 5569 #endif 5570 5571 rw_init(&dev_priv->fb_tracking.lock, "fbtrlk"); 5572 } 5573 5574 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 5575 { 5576 struct drm_i915_file_private *file_priv = file->driver_priv; 5577 5578 /* Clean up our request list when the client is going away, so that 5579 * later retire_requests won't dereference our soon-to-be-gone 5580 * file_priv. 5581 */ 5582 spin_lock(&file_priv->mm.lock); 5583 while (!list_empty(&file_priv->mm.request_list)) { 5584 struct drm_i915_gem_request *request; 5585 5586 request = list_first_entry(&file_priv->mm.request_list, 5587 struct drm_i915_gem_request, 5588 client_list); 5589 list_del(&request->client_list); 5590 request->file_priv = NULL; 5591 } 5592 spin_unlock(&file_priv->mm.lock); 5593 5594 if (!list_empty(&file_priv->rps.link)) { 5595 spin_lock(&to_i915(dev)->rps.client_lock); 5596 list_del(&file_priv->rps.link); 5597 spin_unlock(&to_i915(dev)->rps.client_lock); 5598 } 5599 } 5600 5601 int i915_gem_open(struct drm_device *dev, struct drm_file *file) 5602 { 5603 struct drm_i915_file_private *file_priv; 5604 int ret; 5605 5606 DRM_DEBUG_DRIVER("\n"); 5607 5608 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 5609 if (!file_priv) 5610 return -ENOMEM; 5611 5612 file->driver_priv = file_priv; 5613 file_priv->dev_priv = dev->dev_private; 5614 file_priv->file = file; 5615 INIT_LIST_HEAD(&file_priv->rps.link); 5616 5617 mtx_init(&file_priv->mm.lock, IPL_NONE); 5618 INIT_LIST_HEAD(&file_priv->mm.request_list); 5619 5620 ret = i915_gem_context_open(dev, file); 5621 if (ret) 5622 kfree(file_priv); 5623 5624 return ret; 5625 } 5626 5627 /** 5628 * i915_gem_track_fb - update frontbuffer tracking 5629 * @old: current GEM buffer for the frontbuffer slots 5630 * @new: new GEM buffer for the frontbuffer slots 5631 * @frontbuffer_bits: bitmask of frontbuffer slots 5632 * 5633 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them 5634 * from @old and setting them in @new. Both @old and @new can be NULL. 5635 */ 5636 void i915_gem_track_fb(struct drm_i915_gem_object *old, 5637 struct drm_i915_gem_object *new, 5638 unsigned frontbuffer_bits) 5639 { 5640 if (old) { 5641 WARN_ON(!mutex_is_locked(&old->base.dev->struct_mutex)); 5642 WARN_ON(!(old->frontbuffer_bits & frontbuffer_bits)); 5643 old->frontbuffer_bits &= ~frontbuffer_bits; 5644 } 5645 5646 if (new) { 5647 WARN_ON(!mutex_is_locked(&new->base.dev->struct_mutex)); 5648 WARN_ON(new->frontbuffer_bits & frontbuffer_bits); 5649 new->frontbuffer_bits |= frontbuffer_bits; 5650 } 5651 } 5652 5653 /* All the new VM stuff */ 5654 u64 i915_gem_obj_offset(struct drm_i915_gem_object *o, 5655 struct i915_address_space *vm) 5656 { 5657 struct drm_i915_private *dev_priv = o->base.dev->dev_private; 5658 struct i915_vma *vma; 5659 5660 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); 5661 5662 list_for_each_entry(vma, &o->vma_list, vma_link) { 5663 if (i915_is_ggtt(vma->vm) && 5664 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5665 continue; 5666 if (vma->vm == vm) 5667 return vma->node.start; 5668 } 5669 5670 WARN(1, "%s vma for this object not found.\n", 5671 i915_is_ggtt(vm) ? "global" : "ppgtt"); 5672 return -1; 5673 } 5674 5675 u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o, 5676 const struct i915_ggtt_view *view) 5677 { 5678 struct i915_address_space *ggtt = i915_obj_to_ggtt(o); 5679 struct i915_vma *vma; 5680 5681 list_for_each_entry(vma, &o->vma_list, vma_link) 5682 if (vma->vm == ggtt && 5683 i915_ggtt_view_equal(&vma->ggtt_view, view)) 5684 return vma->node.start; 5685 5686 WARN(1, "global vma for this object not found. (view=%u)\n", view->type); 5687 return -1; 5688 } 5689 5690 bool i915_gem_obj_bound(struct drm_i915_gem_object *o, 5691 struct i915_address_space *vm) 5692 { 5693 struct i915_vma *vma; 5694 5695 list_for_each_entry(vma, &o->vma_list, vma_link) { 5696 if (i915_is_ggtt(vma->vm) && 5697 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5698 continue; 5699 if (vma->vm == vm && drm_mm_node_allocated(&vma->node)) 5700 return true; 5701 } 5702 5703 return false; 5704 } 5705 5706 bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o, 5707 const struct i915_ggtt_view *view) 5708 { 5709 struct i915_address_space *ggtt = i915_obj_to_ggtt(o); 5710 struct i915_vma *vma; 5711 5712 list_for_each_entry(vma, &o->vma_list, vma_link) 5713 if (vma->vm == ggtt && 5714 i915_ggtt_view_equal(&vma->ggtt_view, view) && 5715 drm_mm_node_allocated(&vma->node)) 5716 return true; 5717 5718 return false; 5719 } 5720 5721 bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o) 5722 { 5723 struct i915_vma *vma; 5724 5725 list_for_each_entry(vma, &o->vma_list, vma_link) 5726 if (drm_mm_node_allocated(&vma->node)) 5727 return true; 5728 5729 return false; 5730 } 5731 5732 unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o, 5733 struct i915_address_space *vm) 5734 { 5735 struct drm_i915_private *dev_priv = o->base.dev->dev_private; 5736 struct i915_vma *vma; 5737 5738 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); 5739 5740 BUG_ON(list_empty(&o->vma_list)); 5741 5742 list_for_each_entry(vma, &o->vma_list, vma_link) { 5743 if (i915_is_ggtt(vma->vm) && 5744 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5745 continue; 5746 if (vma->vm == vm) 5747 return vma->node.size; 5748 } 5749 return 0; 5750 } 5751 5752 bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj) 5753 { 5754 struct i915_vma *vma; 5755 list_for_each_entry(vma, &obj->vma_list, vma_link) 5756 if (vma->pin_count > 0) 5757 return true; 5758 5759 return false; 5760 } 5761 5762 /* Allocate a new GEM object and fill it with the supplied data */ 5763 struct drm_i915_gem_object * 5764 i915_gem_object_create_from_data(struct drm_device *dev, 5765 const void *data, size_t size) 5766 { 5767 struct drm_i915_gem_object *obj; 5768 struct sg_table *sg; 5769 size_t bytes; 5770 int ret; 5771 5772 obj = i915_gem_alloc_object(dev, round_up(size, PAGE_SIZE)); 5773 if (IS_ERR_OR_NULL(obj)) 5774 return obj; 5775 5776 ret = i915_gem_object_set_to_cpu_domain(obj, true); 5777 if (ret) 5778 goto fail; 5779 5780 ret = i915_gem_object_get_pages(obj); 5781 if (ret) 5782 goto fail; 5783 5784 i915_gem_object_pin_pages(obj); 5785 sg = obj->pages; 5786 bytes = sg_copy_from_buffer(sg->sgl, sg->nents, (void *)data, size); 5787 i915_gem_object_unpin_pages(obj); 5788 5789 if (WARN_ON(bytes != size)) { 5790 DRM_ERROR("Incomplete copy, wrote %zu of %zu", bytes, size); 5791 ret = -EFAULT; 5792 goto fail; 5793 } 5794 5795 return obj; 5796 5797 fail: 5798 drm_gem_object_unreference(&obj->base); 5799 return ERR_PTR(ret); 5800 } 5801