1 /* 2 * Copyright © 2008,2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * Chris Wilson <chris@chris-wilson.co.uk> 26 * 27 */ 28 29 #include <drm/drmP.h> 30 #include <drm/i915_drm.h> 31 #include "i915_drv.h" 32 #include "i915_trace.h" 33 #include "intel_drv.h" 34 #include <linux/pagemap.h> 35 36 #define __EXEC_OBJECT_HAS_PIN (1<<31) 37 #define __EXEC_OBJECT_HAS_FENCE (1<<30) 38 #define __EXEC_OBJECT_NEEDS_BIAS (1<<28) 39 40 #define BATCH_OFFSET_BIAS (256*1024) 41 42 struct eb_vmas { 43 struct list_head vmas; 44 int and; 45 union { 46 struct i915_vma *lut[0]; 47 struct hlist_head buckets[0]; 48 }; 49 }; 50 51 static struct eb_vmas * 52 eb_create(struct drm_i915_gem_execbuffer2 *args) 53 { 54 struct eb_vmas *eb = NULL; 55 56 if (args->flags & I915_EXEC_HANDLE_LUT) { 57 unsigned size = args->buffer_count; 58 size *= sizeof(struct i915_vma *); 59 size += sizeof(struct eb_vmas); 60 eb = kmalloc(size, M_DRM, M_NOWAIT); 61 } 62 63 if (eb == NULL) { 64 unsigned size = args->buffer_count; 65 unsigned count = PAGE_SIZE / sizeof(struct hlist_head) / 2; 66 BUILD_BUG_ON_NOT_POWER_OF_2(PAGE_SIZE / sizeof(struct hlist_head)); 67 while (count > 2*size) 68 count >>= 1; 69 eb = kzalloc(count*sizeof(struct hlist_head) + 70 sizeof(struct eb_vmas), 71 GFP_TEMPORARY); 72 if (eb == NULL) 73 return eb; 74 75 eb->and = count - 1; 76 } else 77 eb->and = -args->buffer_count; 78 79 INIT_LIST_HEAD(&eb->vmas); 80 return eb; 81 } 82 83 static void 84 eb_reset(struct eb_vmas *eb) 85 { 86 if (eb->and >= 0) 87 memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head)); 88 } 89 90 static int 91 eb_lookup_vmas(struct eb_vmas *eb, 92 struct drm_i915_gem_exec_object2 *exec, 93 const struct drm_i915_gem_execbuffer2 *args, 94 struct i915_address_space *vm, 95 struct drm_file *file) 96 { 97 struct drm_i915_private *dev_priv = vm->dev->dev_private; 98 struct drm_i915_gem_object *obj; 99 struct list_head objects; 100 int i, ret; 101 102 INIT_LIST_HEAD(&objects); 103 lockmgr(&file->table_lock, LK_EXCLUSIVE); 104 /* Grab a reference to the object and release the lock so we can lookup 105 * or create the VMA without using GFP_ATOMIC */ 106 for (i = 0; i < args->buffer_count; i++) { 107 obj = to_intel_bo(idr_find(&file->object_idr, exec[i].handle)); 108 if (obj == NULL) { 109 lockmgr(&file->table_lock, LK_RELEASE); 110 DRM_DEBUG("Invalid object handle %d at index %d\n", 111 exec[i].handle, i); 112 ret = -ENOENT; 113 goto err; 114 } 115 116 if (!list_empty(&obj->obj_exec_link)) { 117 lockmgr(&file->table_lock, LK_RELEASE); 118 DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n", 119 obj, exec[i].handle, i); 120 ret = -EINVAL; 121 goto err; 122 } 123 124 drm_gem_object_reference(&obj->base); 125 list_add_tail(&obj->obj_exec_link, &objects); 126 } 127 lockmgr(&file->table_lock, LK_RELEASE); 128 129 i = 0; 130 while (!list_empty(&objects)) { 131 struct i915_vma *vma; 132 struct i915_address_space *bind_vm = vm; 133 134 if (exec[i].flags & EXEC_OBJECT_NEEDS_GTT && 135 USES_FULL_PPGTT(vm->dev)) { 136 ret = -EINVAL; 137 goto err; 138 } 139 140 /* If we have secure dispatch, or the userspace assures us that 141 * they know what they're doing, use the GGTT VM. 142 */ 143 if (((args->flags & I915_EXEC_SECURE) && 144 (i == (args->buffer_count - 1)))) 145 bind_vm = &dev_priv->gtt.base; 146 147 obj = list_first_entry(&objects, 148 struct drm_i915_gem_object, 149 obj_exec_link); 150 151 /* 152 * NOTE: We can leak any vmas created here when something fails 153 * later on. But that's no issue since vma_unbind can deal with 154 * vmas which are not actually bound. And since only 155 * lookup_or_create exists as an interface to get at the vma 156 * from the (obj, vm) we don't run the risk of creating 157 * duplicated vmas for the same vm. 158 */ 159 vma = i915_gem_obj_lookup_or_create_vma(obj, bind_vm); 160 if (IS_ERR(vma)) { 161 DRM_DEBUG("Failed to lookup VMA\n"); 162 ret = PTR_ERR(vma); 163 goto err; 164 } 165 166 /* Transfer ownership from the objects list to the vmas list. */ 167 list_add_tail(&vma->exec_list, &eb->vmas); 168 list_del_init(&obj->obj_exec_link); 169 170 vma->exec_entry = &exec[i]; 171 if (eb->and < 0) { 172 eb->lut[i] = vma; 173 } else { 174 uint32_t handle = args->flags & I915_EXEC_HANDLE_LUT ? i : exec[i].handle; 175 vma->exec_handle = handle; 176 hlist_add_head(&vma->exec_node, 177 &eb->buckets[handle & eb->and]); 178 } 179 ++i; 180 } 181 182 return 0; 183 184 185 err: 186 while (!list_empty(&objects)) { 187 obj = list_first_entry(&objects, 188 struct drm_i915_gem_object, 189 obj_exec_link); 190 list_del_init(&obj->obj_exec_link); 191 drm_gem_object_unreference(&obj->base); 192 } 193 /* 194 * Objects already transfered to the vmas list will be unreferenced by 195 * eb_destroy. 196 */ 197 198 return ret; 199 } 200 201 static struct i915_vma *eb_get_vma(struct eb_vmas *eb, unsigned long handle) 202 { 203 if (eb->and < 0) { 204 if (handle >= -eb->and) 205 return NULL; 206 return eb->lut[handle]; 207 } else { 208 struct hlist_head *head; 209 struct hlist_node *node; 210 211 head = &eb->buckets[handle & eb->and]; 212 hlist_for_each(node, head) { 213 struct i915_vma *vma; 214 215 vma = hlist_entry(node, struct i915_vma, exec_node); 216 if (vma->exec_handle == handle) 217 return vma; 218 } 219 return NULL; 220 } 221 } 222 223 static void 224 i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma) 225 { 226 struct drm_i915_gem_exec_object2 *entry; 227 struct drm_i915_gem_object *obj = vma->obj; 228 229 if (!drm_mm_node_allocated(&vma->node)) 230 return; 231 232 entry = vma->exec_entry; 233 234 if (entry->flags & __EXEC_OBJECT_HAS_FENCE) 235 i915_gem_object_unpin_fence(obj); 236 237 if (entry->flags & __EXEC_OBJECT_HAS_PIN) 238 vma->pin_count--; 239 240 entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN); 241 } 242 243 static void eb_destroy(struct eb_vmas *eb) 244 { 245 while (!list_empty(&eb->vmas)) { 246 struct i915_vma *vma; 247 248 vma = list_first_entry(&eb->vmas, 249 struct i915_vma, 250 exec_list); 251 list_del_init(&vma->exec_list); 252 i915_gem_execbuffer_unreserve_vma(vma); 253 drm_gem_object_unreference(&vma->obj->base); 254 } 255 kfree(eb); 256 } 257 258 static inline int use_cpu_reloc(struct drm_i915_gem_object *obj) 259 { 260 return (HAS_LLC(obj->base.dev) || 261 obj->base.write_domain == I915_GEM_DOMAIN_CPU || 262 !obj->map_and_fenceable || 263 obj->cache_level != I915_CACHE_NONE); 264 } 265 266 static int 267 relocate_entry_cpu(struct drm_i915_gem_object *obj, 268 struct drm_i915_gem_relocation_entry *reloc, 269 uint64_t target_offset) 270 { 271 struct drm_device *dev = obj->base.dev; 272 uint32_t page_offset = offset_in_page(reloc->offset); 273 uint64_t delta = reloc->delta + target_offset; 274 char *vaddr; 275 int ret; 276 277 ret = i915_gem_object_set_to_cpu_domain(obj, true); 278 if (ret) 279 return ret; 280 281 vaddr = kmap_atomic(i915_gem_object_get_page(obj, 282 reloc->offset >> PAGE_SHIFT)); 283 *(uint32_t *)(vaddr + page_offset) = lower_32_bits(delta); 284 285 if (INTEL_INFO(dev)->gen >= 8) { 286 page_offset = offset_in_page(page_offset + sizeof(uint32_t)); 287 288 if (page_offset == 0) { 289 kunmap_atomic(vaddr); 290 vaddr = kmap_atomic(i915_gem_object_get_page(obj, 291 (reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT)); 292 } 293 294 *(uint32_t *)(vaddr + page_offset) = upper_32_bits(delta); 295 } 296 297 kunmap_atomic(vaddr); 298 299 return 0; 300 } 301 302 static int 303 relocate_entry_gtt(struct drm_i915_gem_object *obj, 304 struct drm_i915_gem_relocation_entry *reloc, 305 uint64_t target_offset) 306 { 307 struct drm_device *dev = obj->base.dev; 308 uint64_t delta = reloc->delta + target_offset; 309 uint32_t __iomem *reloc_entry; 310 void __iomem *reloc_page; 311 int ret; 312 313 ret = i915_gem_object_set_to_gtt_domain(obj, true); 314 if (ret) 315 return ret; 316 317 ret = i915_gem_object_put_fence(obj); 318 if (ret) 319 return ret; 320 321 /* Map the page containing the relocation we're going to perform. */ 322 reloc->offset += i915_gem_obj_ggtt_offset(obj); 323 reloc_page = pmap_mapdev_attr(dev->agp->base + (reloc->offset & 324 ~PAGE_MASK), PAGE_SIZE, PAT_WRITE_COMBINING); 325 reloc_entry = (uint32_t __iomem *) 326 ((char *)reloc_page + offset_in_page(reloc->offset)); 327 iowrite32(lower_32_bits(delta), reloc_entry); 328 329 if (INTEL_INFO(dev)->gen >= 8) { 330 reloc_entry += 1; 331 332 if (offset_in_page(reloc->offset + sizeof(uint32_t)) == 0) { 333 pmap_unmapdev((vm_offset_t)reloc_page, PAGE_SIZE); 334 reloc_page = pmap_mapdev_attr( 335 dev->agp->base + 336 reloc->offset + sizeof(uint32_t), 337 PAGE_SIZE, PAT_WRITE_COMBINING); 338 reloc_entry = reloc_page; 339 } 340 341 iowrite32(upper_32_bits(delta), reloc_entry); 342 } 343 344 pmap_unmapdev((vm_offset_t)reloc_page, PAGE_SIZE); 345 346 return 0; 347 } 348 349 static int 350 i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, 351 struct eb_vmas *eb, 352 struct drm_i915_gem_relocation_entry *reloc) 353 { 354 struct drm_device *dev = obj->base.dev; 355 struct drm_gem_object *target_obj; 356 struct drm_i915_gem_object *target_i915_obj; 357 struct i915_vma *target_vma; 358 uint64_t target_offset; 359 int ret; 360 361 /* we've already hold a reference to all valid objects */ 362 target_vma = eb_get_vma(eb, reloc->target_handle); 363 if (unlikely(target_vma == NULL)) 364 return -ENOENT; 365 target_i915_obj = target_vma->obj; 366 target_obj = &target_vma->obj->base; 367 368 target_offset = target_vma->node.start; 369 370 /* Sandybridge PPGTT errata: We need a global gtt mapping for MI and 371 * pipe_control writes because the gpu doesn't properly redirect them 372 * through the ppgtt for non_secure batchbuffers. */ 373 if (unlikely(IS_GEN6(dev) && 374 reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION && 375 !target_i915_obj->has_global_gtt_mapping)) { 376 struct i915_vma *vma = 377 list_first_entry(&target_i915_obj->vma_list, 378 typeof(*vma), vma_link); 379 vma->bind_vma(vma, target_i915_obj->cache_level, GLOBAL_BIND); 380 } 381 382 /* Validate that the target is in a valid r/w GPU domain */ 383 if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) { 384 DRM_DEBUG("reloc with multiple write domains: " 385 "obj %p target %d offset %d " 386 "read %08x write %08x", 387 obj, reloc->target_handle, 388 (int) reloc->offset, 389 reloc->read_domains, 390 reloc->write_domain); 391 return -EINVAL; 392 } 393 if (unlikely((reloc->write_domain | reloc->read_domains) 394 & ~I915_GEM_GPU_DOMAINS)) { 395 DRM_DEBUG("reloc with read/write non-GPU domains: " 396 "obj %p target %d offset %d " 397 "read %08x write %08x", 398 obj, reloc->target_handle, 399 (int) reloc->offset, 400 reloc->read_domains, 401 reloc->write_domain); 402 return -EINVAL; 403 } 404 405 target_obj->pending_read_domains |= reloc->read_domains; 406 target_obj->pending_write_domain |= reloc->write_domain; 407 408 /* If the relocation already has the right value in it, no 409 * more work needs to be done. 410 */ 411 if (target_offset == reloc->presumed_offset) 412 return 0; 413 414 /* Check that the relocation address is valid... */ 415 if (unlikely(reloc->offset > 416 obj->base.size - (INTEL_INFO(dev)->gen >= 8 ? 8 : 4))) { 417 DRM_DEBUG("Relocation beyond object bounds: " 418 "obj %p target %d offset %d size %d.\n", 419 obj, reloc->target_handle, 420 (int) reloc->offset, 421 (int) obj->base.size); 422 return -EINVAL; 423 } 424 if (unlikely(reloc->offset & 3)) { 425 DRM_DEBUG("Relocation not 4-byte aligned: " 426 "obj %p target %d offset %d.\n", 427 obj, reloc->target_handle, 428 (int) reloc->offset); 429 return -EINVAL; 430 } 431 432 /* We can't wait for rendering with pagefaults disabled */ 433 if (obj->active && (curthread->td_flags & TDF_NOFAULT)) 434 return -EFAULT; 435 436 if (use_cpu_reloc(obj)) 437 ret = relocate_entry_cpu(obj, reloc, target_offset); 438 else 439 ret = relocate_entry_gtt(obj, reloc, target_offset); 440 441 if (ret) 442 return ret; 443 444 /* and update the user's relocation entry */ 445 reloc->presumed_offset = target_offset; 446 447 return 0; 448 } 449 450 static int 451 i915_gem_execbuffer_relocate_vma(struct i915_vma *vma, 452 struct eb_vmas *eb) 453 { 454 #define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry)) 455 struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)]; 456 struct drm_i915_gem_relocation_entry __user *user_relocs; 457 struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; 458 int remain, ret; 459 460 user_relocs = to_user_ptr(entry->relocs_ptr); 461 462 remain = entry->relocation_count; 463 while (remain) { 464 struct drm_i915_gem_relocation_entry *r = stack_reloc; 465 int count = remain; 466 if (count > ARRAY_SIZE(stack_reloc)) 467 count = ARRAY_SIZE(stack_reloc); 468 remain -= count; 469 470 if (__copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0]))) 471 return -EFAULT; 472 473 do { 474 u64 offset = r->presumed_offset; 475 476 ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r); 477 if (ret) 478 return ret; 479 480 if (r->presumed_offset != offset && 481 __copy_to_user_inatomic(&user_relocs->presumed_offset, 482 &r->presumed_offset, 483 sizeof(r->presumed_offset))) { 484 return -EFAULT; 485 } 486 487 user_relocs++; 488 r++; 489 } while (--count); 490 } 491 492 return 0; 493 #undef N_RELOC 494 } 495 496 static int 497 i915_gem_execbuffer_relocate_vma_slow(struct i915_vma *vma, 498 struct eb_vmas *eb, 499 struct drm_i915_gem_relocation_entry *relocs) 500 { 501 const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; 502 int i, ret; 503 504 for (i = 0; i < entry->relocation_count; i++) { 505 ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i]); 506 if (ret) 507 return ret; 508 } 509 510 return 0; 511 } 512 513 static int 514 i915_gem_execbuffer_relocate(struct eb_vmas *eb) 515 { 516 struct i915_vma *vma; 517 int ret = 0; 518 519 /* This is the fast path and we cannot handle a pagefault whilst 520 * holding the struct mutex lest the user pass in the relocations 521 * contained within a mmaped bo. For in such a case we, the page 522 * fault handler would call i915_gem_fault() and we would try to 523 * acquire the struct mutex again. Obviously this is bad and so 524 * lockdep complains vehemently. 525 */ 526 pagefault_disable(); 527 list_for_each_entry(vma, &eb->vmas, exec_list) { 528 ret = i915_gem_execbuffer_relocate_vma(vma, eb); 529 if (ret) 530 break; 531 } 532 pagefault_enable(); 533 534 return ret; 535 } 536 537 static int 538 need_reloc_mappable(struct i915_vma *vma) 539 { 540 struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; 541 return entry->relocation_count && !use_cpu_reloc(vma->obj) && 542 i915_is_ggtt(vma->vm); 543 } 544 545 static int 546 i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, 547 struct intel_engine_cs *ring, 548 bool *need_reloc) 549 { 550 struct drm_i915_gem_object *obj = vma->obj; 551 struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; 552 bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; 553 bool need_fence; 554 uint64_t flags; 555 int ret; 556 557 flags = 0; 558 559 need_fence = 560 has_fenced_gpu_access && 561 entry->flags & EXEC_OBJECT_NEEDS_FENCE && 562 obj->tiling_mode != I915_TILING_NONE; 563 if (need_fence || need_reloc_mappable(vma)) 564 flags |= PIN_MAPPABLE; 565 566 if (entry->flags & EXEC_OBJECT_NEEDS_GTT) 567 flags |= PIN_GLOBAL; 568 if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS) 569 flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS; 570 571 ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags); 572 if (ret) 573 return ret; 574 575 entry->flags |= __EXEC_OBJECT_HAS_PIN; 576 577 if (has_fenced_gpu_access) { 578 if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { 579 ret = i915_gem_object_get_fence(obj); 580 if (ret) 581 return ret; 582 583 if (i915_gem_object_pin_fence(obj)) 584 entry->flags |= __EXEC_OBJECT_HAS_FENCE; 585 586 obj->pending_fenced_gpu_access = true; 587 } 588 } 589 590 if (entry->offset != vma->node.start) { 591 entry->offset = vma->node.start; 592 *need_reloc = true; 593 } 594 595 if (entry->flags & EXEC_OBJECT_WRITE) { 596 obj->base.pending_read_domains = I915_GEM_DOMAIN_RENDER; 597 obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER; 598 } 599 600 return 0; 601 } 602 603 static bool 604 eb_vma_misplaced(struct i915_vma *vma, bool has_fenced_gpu_access) 605 { 606 struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; 607 struct drm_i915_gem_object *obj = vma->obj; 608 bool need_fence, need_mappable; 609 610 need_fence = 611 has_fenced_gpu_access && 612 entry->flags & EXEC_OBJECT_NEEDS_FENCE && 613 obj->tiling_mode != I915_TILING_NONE; 614 need_mappable = need_fence || need_reloc_mappable(vma); 615 616 WARN_ON((need_mappable || need_fence) && 617 !i915_is_ggtt(vma->vm)); 618 619 if (entry->alignment && 620 vma->node.start & (entry->alignment - 1)) 621 return true; 622 623 if (need_mappable && !obj->map_and_fenceable) 624 return true; 625 626 if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS && 627 vma->node.start < BATCH_OFFSET_BIAS) 628 return true; 629 630 return false; 631 } 632 633 static int 634 i915_gem_execbuffer_reserve(struct intel_engine_cs *ring, 635 struct list_head *vmas, 636 bool *need_relocs) 637 { 638 struct drm_i915_gem_object *obj; 639 struct i915_vma *vma; 640 struct i915_address_space *vm; 641 struct list_head ordered_vmas; 642 bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; 643 int retry; 644 645 if (list_empty(vmas)) 646 return 0; 647 648 i915_gem_retire_requests_ring(ring); 649 650 vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm; 651 652 INIT_LIST_HEAD(&ordered_vmas); 653 while (!list_empty(vmas)) { 654 struct drm_i915_gem_exec_object2 *entry; 655 bool need_fence, need_mappable; 656 657 vma = list_first_entry(vmas, struct i915_vma, exec_list); 658 obj = vma->obj; 659 entry = vma->exec_entry; 660 661 need_fence = 662 has_fenced_gpu_access && 663 entry->flags & EXEC_OBJECT_NEEDS_FENCE && 664 obj->tiling_mode != I915_TILING_NONE; 665 need_mappable = need_fence || need_reloc_mappable(vma); 666 667 if (need_mappable) 668 list_move(&vma->exec_list, &ordered_vmas); 669 else 670 list_move_tail(&vma->exec_list, &ordered_vmas); 671 672 obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND; 673 obj->base.pending_write_domain = 0; 674 obj->pending_fenced_gpu_access = false; 675 } 676 list_splice(&ordered_vmas, vmas); 677 678 /* Attempt to pin all of the buffers into the GTT. 679 * This is done in 3 phases: 680 * 681 * 1a. Unbind all objects that do not match the GTT constraints for 682 * the execbuffer (fenceable, mappable, alignment etc). 683 * 1b. Increment pin count for already bound objects. 684 * 2. Bind new objects. 685 * 3. Decrement pin count. 686 * 687 * This avoid unnecessary unbinding of later objects in order to make 688 * room for the earlier objects *unless* we need to defragment. 689 */ 690 retry = 0; 691 do { 692 int ret = 0; 693 694 /* Unbind any ill-fitting objects or pin. */ 695 list_for_each_entry(vma, vmas, exec_list) { 696 if (!drm_mm_node_allocated(&vma->node)) 697 continue; 698 699 if (eb_vma_misplaced(vma, has_fenced_gpu_access)) 700 ret = i915_vma_unbind(vma); 701 else 702 ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs); 703 if (ret) 704 goto err; 705 } 706 707 /* Bind fresh objects */ 708 list_for_each_entry(vma, vmas, exec_list) { 709 if (drm_mm_node_allocated(&vma->node)) 710 continue; 711 712 ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs); 713 if (ret) 714 goto err; 715 } 716 717 err: 718 if (ret != -ENOSPC || retry++) 719 return ret; 720 721 /* Decrement pin count for bound objects */ 722 list_for_each_entry(vma, vmas, exec_list) 723 i915_gem_execbuffer_unreserve_vma(vma); 724 725 ret = i915_gem_evict_vm(vm, true); 726 if (ret) 727 return ret; 728 } while (1); 729 } 730 731 static int 732 i915_gem_execbuffer_relocate_slow(struct drm_device *dev, 733 struct drm_i915_gem_execbuffer2 *args, 734 struct drm_file *file, 735 struct intel_engine_cs *ring, 736 struct eb_vmas *eb, 737 struct drm_i915_gem_exec_object2 *exec) 738 { 739 struct drm_i915_gem_relocation_entry *reloc; 740 struct i915_address_space *vm; 741 struct i915_vma *vma; 742 bool need_relocs; 743 int *reloc_offset; 744 int i, total, ret; 745 unsigned count = args->buffer_count; 746 747 if (WARN_ON(list_empty(&eb->vmas))) 748 return 0; 749 750 vm = list_first_entry(&eb->vmas, struct i915_vma, exec_list)->vm; 751 752 /* We may process another execbuffer during the unlock... */ 753 while (!list_empty(&eb->vmas)) { 754 vma = list_first_entry(&eb->vmas, struct i915_vma, exec_list); 755 list_del_init(&vma->exec_list); 756 i915_gem_execbuffer_unreserve_vma(vma); 757 drm_gem_object_unreference(&vma->obj->base); 758 } 759 760 mutex_unlock(&dev->struct_mutex); 761 762 total = 0; 763 for (i = 0; i < count; i++) 764 total += exec[i].relocation_count; 765 766 reloc_offset = drm_malloc_ab(count, sizeof(*reloc_offset)); 767 reloc = drm_malloc_ab(total, sizeof(*reloc)); 768 if (reloc == NULL || reloc_offset == NULL) { 769 drm_free_large(reloc); 770 drm_free_large(reloc_offset); 771 mutex_lock(&dev->struct_mutex); 772 return -ENOMEM; 773 } 774 775 total = 0; 776 for (i = 0; i < count; i++) { 777 struct drm_i915_gem_relocation_entry __user *user_relocs; 778 u64 invalid_offset = (u64)-1; 779 int j; 780 781 user_relocs = to_user_ptr(exec[i].relocs_ptr); 782 783 if (copy_from_user(reloc+total, user_relocs, 784 exec[i].relocation_count * sizeof(*reloc))) { 785 ret = -EFAULT; 786 mutex_lock(&dev->struct_mutex); 787 goto err; 788 } 789 790 /* As we do not update the known relocation offsets after 791 * relocating (due to the complexities in lock handling), 792 * we need to mark them as invalid now so that we force the 793 * relocation processing next time. Just in case the target 794 * object is evicted and then rebound into its old 795 * presumed_offset before the next execbuffer - if that 796 * happened we would make the mistake of assuming that the 797 * relocations were valid. 798 */ 799 for (j = 0; j < exec[i].relocation_count; j++) { 800 if (__copy_to_user(&user_relocs[j].presumed_offset, 801 &invalid_offset, 802 sizeof(invalid_offset))) { 803 ret = -EFAULT; 804 mutex_lock(&dev->struct_mutex); 805 goto err; 806 } 807 } 808 809 reloc_offset[i] = total; 810 total += exec[i].relocation_count; 811 } 812 813 ret = i915_mutex_lock_interruptible(dev); 814 if (ret) { 815 mutex_lock(&dev->struct_mutex); 816 goto err; 817 } 818 819 /* reacquire the objects */ 820 eb_reset(eb); 821 ret = eb_lookup_vmas(eb, exec, args, vm, file); 822 if (ret) 823 goto err; 824 825 need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0; 826 ret = i915_gem_execbuffer_reserve(ring, &eb->vmas, &need_relocs); 827 if (ret) 828 goto err; 829 830 list_for_each_entry(vma, &eb->vmas, exec_list) { 831 int offset = vma->exec_entry - exec; 832 ret = i915_gem_execbuffer_relocate_vma_slow(vma, eb, 833 reloc + reloc_offset[offset]); 834 if (ret) 835 goto err; 836 } 837 838 /* Leave the user relocations as are, this is the painfully slow path, 839 * and we want to avoid the complication of dropping the lock whilst 840 * having buffers reserved in the aperture and so causing spurious 841 * ENOSPC for random operations. 842 */ 843 844 err: 845 drm_free_large(reloc); 846 drm_free_large(reloc_offset); 847 return ret; 848 } 849 850 static int 851 i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring, 852 struct list_head *vmas) 853 { 854 struct i915_vma *vma; 855 uint32_t flush_domains = 0; 856 bool flush_chipset = false; 857 int ret; 858 859 list_for_each_entry(vma, vmas, exec_list) { 860 struct drm_i915_gem_object *obj = vma->obj; 861 ret = i915_gem_object_sync(obj, ring); 862 if (ret) 863 return ret; 864 865 if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) 866 flush_chipset |= i915_gem_clflush_object(obj, false); 867 868 flush_domains |= obj->base.write_domain; 869 } 870 871 if (flush_chipset) 872 i915_gem_chipset_flush(ring->dev); 873 874 if (flush_domains & I915_GEM_DOMAIN_GTT) 875 wmb(); 876 877 /* Unconditionally invalidate gpu caches and ensure that we do flush 878 * any residual writes from the previous batch. 879 */ 880 return intel_ring_invalidate_all_caches(ring); 881 } 882 883 static bool 884 i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) 885 { 886 if (exec->flags & __I915_EXEC_UNKNOWN_FLAGS) 887 return false; 888 889 return ((exec->batch_start_offset | exec->batch_len) & 0x7) == 0; 890 } 891 892 static int 893 validate_exec_list(struct drm_i915_gem_exec_object2 *exec, 894 int count) 895 { 896 int i; 897 unsigned relocs_total = 0; 898 unsigned relocs_max = UINT_MAX / sizeof(struct drm_i915_gem_relocation_entry); 899 900 for (i = 0; i < count; i++) { 901 char __user *ptr = to_user_ptr(exec[i].relocs_ptr); 902 int length; /* limited by fault_in_pages_readable() */ 903 904 if (exec[i].flags & __EXEC_OBJECT_UNKNOWN_FLAGS) 905 return -EINVAL; 906 907 /* First check for malicious input causing overflow in 908 * the worst case where we need to allocate the entire 909 * relocation tree as a single array. 910 */ 911 if (exec[i].relocation_count > relocs_max - relocs_total) 912 return -EINVAL; 913 relocs_total += exec[i].relocation_count; 914 915 length = exec[i].relocation_count * 916 sizeof(struct drm_i915_gem_relocation_entry); 917 /* 918 * We must check that the entire relocation array is safe 919 * to read, but since we may need to update the presumed 920 * offsets during execution, check for full write access. 921 */ 922 #if 0 923 if (!access_ok(VERIFY_WRITE, ptr, length)) 924 return -EFAULT; 925 #endif 926 927 if (likely(!i915.prefault_disable)) { 928 if (fault_in_multipages_readable(ptr, length)) 929 return -EFAULT; 930 } 931 } 932 933 return 0; 934 } 935 936 static struct intel_context * 937 i915_gem_validate_context(struct drm_device *dev, struct drm_file *file, 938 struct intel_engine_cs *ring, const u32 ctx_id) 939 { 940 struct intel_context *ctx = NULL; 941 struct i915_ctx_hang_stats *hs; 942 943 if (ring->id != RCS && ctx_id != DEFAULT_CONTEXT_ID) 944 return ERR_PTR(-EINVAL); 945 946 ctx = i915_gem_context_get(file->driver_priv, ctx_id); 947 if (IS_ERR(ctx)) 948 return ctx; 949 950 hs = &ctx->hang_stats; 951 if (hs->banned) { 952 DRM_DEBUG("Context %u tried to submit while banned\n", ctx_id); 953 return ERR_PTR(-EIO); 954 } 955 956 return ctx; 957 } 958 959 static void 960 i915_gem_execbuffer_move_to_active(struct list_head *vmas, 961 struct intel_engine_cs *ring) 962 { 963 struct i915_vma *vma; 964 965 list_for_each_entry(vma, vmas, exec_list) { 966 struct drm_i915_gem_object *obj = vma->obj; 967 u32 old_read = obj->base.read_domains; 968 u32 old_write = obj->base.write_domain; 969 970 obj->base.write_domain = obj->base.pending_write_domain; 971 if (obj->base.write_domain == 0) 972 obj->base.pending_read_domains |= obj->base.read_domains; 973 obj->base.read_domains = obj->base.pending_read_domains; 974 obj->fenced_gpu_access = obj->pending_fenced_gpu_access; 975 976 i915_vma_move_to_active(vma, ring); 977 if (obj->base.write_domain) { 978 obj->dirty = 1; 979 obj->last_write_seqno = intel_ring_get_seqno(ring); 980 /* check for potential scanout */ 981 if (i915_gem_obj_ggtt_bound(obj) && 982 i915_gem_obj_to_ggtt(obj)->pin_count) 983 intel_mark_fb_busy(obj, ring); 984 985 /* update for the implicit flush after a batch */ 986 obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; 987 } 988 989 trace_i915_gem_object_change_domain(obj, old_read, old_write); 990 } 991 } 992 993 static void 994 i915_gem_execbuffer_retire_commands(struct drm_device *dev, 995 struct drm_file *file, 996 struct intel_engine_cs *ring, 997 struct drm_i915_gem_object *obj) 998 { 999 /* Unconditionally force add_request to emit a full flush. */ 1000 ring->gpu_caches_dirty = true; 1001 1002 /* Add a breadcrumb for the completion of the batch buffer */ 1003 (void)__i915_add_request(ring, file, obj, NULL); 1004 } 1005 1006 static int 1007 i915_reset_gen7_sol_offsets(struct drm_device *dev, 1008 struct intel_engine_cs *ring) 1009 { 1010 struct drm_i915_private *dev_priv = dev->dev_private; 1011 int ret, i; 1012 1013 if (!IS_GEN7(dev) || ring != &dev_priv->ring[RCS]) { 1014 DRM_DEBUG("sol reset is gen7/rcs only\n"); 1015 return -EINVAL; 1016 } 1017 1018 ret = intel_ring_begin(ring, 4 * 3); 1019 if (ret) 1020 return ret; 1021 1022 for (i = 0; i < 4; i++) { 1023 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 1024 intel_ring_emit(ring, GEN7_SO_WRITE_OFFSET(i)); 1025 intel_ring_emit(ring, 0); 1026 } 1027 1028 intel_ring_advance(ring); 1029 1030 return 0; 1031 } 1032 1033 /** 1034 * Find one BSD ring to dispatch the corresponding BSD command. 1035 * The Ring ID is returned. 1036 */ 1037 static int gen8_dispatch_bsd_ring(struct drm_device *dev, 1038 struct drm_file *file) 1039 { 1040 struct drm_i915_private *dev_priv = dev->dev_private; 1041 struct drm_i915_file_private *file_priv = file->driver_priv; 1042 1043 /* Check whether the file_priv is using one ring */ 1044 if (file_priv->bsd_ring) 1045 return file_priv->bsd_ring->id; 1046 else { 1047 /* If no, use the ping-pong mechanism to select one ring */ 1048 int ring_id; 1049 1050 mutex_lock(&dev->struct_mutex); 1051 if (dev_priv->mm.bsd_ring_dispatch_index == 0) { 1052 ring_id = VCS; 1053 dev_priv->mm.bsd_ring_dispatch_index = 1; 1054 } else { 1055 ring_id = VCS2; 1056 dev_priv->mm.bsd_ring_dispatch_index = 0; 1057 } 1058 file_priv->bsd_ring = &dev_priv->ring[ring_id]; 1059 mutex_unlock(&dev->struct_mutex); 1060 return ring_id; 1061 } 1062 } 1063 1064 static struct drm_i915_gem_object * 1065 eb_get_batch(struct eb_vmas *eb) 1066 { 1067 struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list); 1068 1069 /* 1070 * SNA is doing fancy tricks with compressing batch buffers, which leads 1071 * to negative relocation deltas. Usually that works out ok since the 1072 * relocate address is still positive, except when the batch is placed 1073 * very low in the GTT. Ensure this doesn't happen. 1074 * 1075 * Note that actual hangs have only been observed on gen7, but for 1076 * paranoia do it everywhere. 1077 */ 1078 vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; 1079 1080 return vma->obj; 1081 } 1082 1083 static int 1084 i915_gem_do_execbuffer(struct drm_device *dev, void *data, 1085 struct drm_file *file, 1086 struct drm_i915_gem_execbuffer2 *args, 1087 struct drm_i915_gem_exec_object2 *exec) 1088 { 1089 struct drm_i915_private *dev_priv = dev->dev_private; 1090 struct eb_vmas *eb; 1091 struct drm_i915_gem_object *batch_obj; 1092 struct drm_clip_rect *cliprects = NULL; 1093 struct intel_engine_cs *ring; 1094 struct intel_context *ctx; 1095 struct i915_address_space *vm; 1096 const u32 ctx_id = i915_execbuffer2_get_context_id(*args); 1097 u64 exec_start = args->batch_start_offset, exec_len; 1098 u32 mask, flags; 1099 int ret, mode, i; 1100 bool need_relocs; 1101 1102 if (!i915_gem_check_execbuffer(args)) 1103 return -EINVAL; 1104 1105 ret = validate_exec_list(exec, args->buffer_count); 1106 if (ret) 1107 return ret; 1108 1109 flags = 0; 1110 if (args->flags & I915_EXEC_SECURE) { 1111 flags |= I915_DISPATCH_SECURE; 1112 } 1113 if (args->flags & I915_EXEC_IS_PINNED) 1114 flags |= I915_DISPATCH_PINNED; 1115 1116 if ((args->flags & I915_EXEC_RING_MASK) > LAST_USER_RING) { 1117 DRM_DEBUG("execbuf with unknown ring: %d\n", 1118 (int)(args->flags & I915_EXEC_RING_MASK)); 1119 return -EINVAL; 1120 } 1121 1122 if ((args->flags & I915_EXEC_RING_MASK) == I915_EXEC_DEFAULT) 1123 ring = &dev_priv->ring[RCS]; 1124 else if ((args->flags & I915_EXEC_RING_MASK) == I915_EXEC_BSD) { 1125 if (HAS_BSD2(dev)) { 1126 int ring_id; 1127 ring_id = gen8_dispatch_bsd_ring(dev, file); 1128 ring = &dev_priv->ring[ring_id]; 1129 } else 1130 ring = &dev_priv->ring[VCS]; 1131 } else 1132 ring = &dev_priv->ring[(args->flags & I915_EXEC_RING_MASK) - 1]; 1133 1134 if (!intel_ring_initialized(ring)) { 1135 DRM_DEBUG("execbuf with invalid ring: %d\n", 1136 (int)(args->flags & I915_EXEC_RING_MASK)); 1137 return -EINVAL; 1138 } 1139 1140 mode = args->flags & I915_EXEC_CONSTANTS_MASK; 1141 mask = I915_EXEC_CONSTANTS_MASK; 1142 switch (mode) { 1143 case I915_EXEC_CONSTANTS_REL_GENERAL: 1144 case I915_EXEC_CONSTANTS_ABSOLUTE: 1145 case I915_EXEC_CONSTANTS_REL_SURFACE: 1146 if (mode != 0 && ring != &dev_priv->ring[RCS]) { 1147 DRM_DEBUG("non-0 rel constants mode on non-RCS\n"); 1148 return -EINVAL; 1149 } 1150 1151 if (mode != dev_priv->relative_constants_mode) { 1152 if (INTEL_INFO(dev)->gen < 4) { 1153 DRM_DEBUG("no rel constants on pre-gen4\n"); 1154 return -EINVAL; 1155 } 1156 1157 if (INTEL_INFO(dev)->gen > 5 && 1158 mode == I915_EXEC_CONSTANTS_REL_SURFACE) { 1159 DRM_DEBUG("rel surface constants mode invalid on gen5+\n"); 1160 return -EINVAL; 1161 } 1162 1163 /* The HW changed the meaning on this bit on gen6 */ 1164 if (INTEL_INFO(dev)->gen >= 6) 1165 mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE; 1166 } 1167 break; 1168 default: 1169 DRM_DEBUG("execbuf with unknown constants: %d\n", mode); 1170 return -EINVAL; 1171 } 1172 1173 if (args->buffer_count < 1) { 1174 DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count); 1175 return -EINVAL; 1176 } 1177 1178 if (args->num_cliprects != 0) { 1179 if (ring != &dev_priv->ring[RCS]) { 1180 DRM_DEBUG("clip rectangles are only valid with the render ring\n"); 1181 return -EINVAL; 1182 } 1183 1184 if (INTEL_INFO(dev)->gen >= 5) { 1185 DRM_DEBUG("clip rectangles are only valid on pre-gen5\n"); 1186 return -EINVAL; 1187 } 1188 1189 if (args->num_cliprects > UINT_MAX / sizeof(*cliprects)) { 1190 DRM_DEBUG("execbuf with %u cliprects\n", 1191 args->num_cliprects); 1192 return -EINVAL; 1193 } 1194 1195 cliprects = kcalloc(args->num_cliprects, 1196 sizeof(*cliprects), 1197 GFP_KERNEL); 1198 if (cliprects == NULL) { 1199 ret = -ENOMEM; 1200 goto pre_mutex_err; 1201 } 1202 1203 if (copy_from_user(cliprects, 1204 to_user_ptr(args->cliprects_ptr), 1205 sizeof(*cliprects)*args->num_cliprects)) { 1206 ret = -EFAULT; 1207 goto pre_mutex_err; 1208 } 1209 } else { 1210 if (args->DR4 == 0xffffffff) { 1211 DRM_DEBUG("UXA submitting garbage DR4, fixing up\n"); 1212 args->DR4 = 0; 1213 } 1214 1215 if (args->DR1 || args->DR4 || args->cliprects_ptr) { 1216 DRM_DEBUG("0 cliprects but dirt in cliprects fields\n"); 1217 return -EINVAL; 1218 } 1219 } 1220 1221 intel_runtime_pm_get(dev_priv); 1222 1223 ret = i915_mutex_lock_interruptible(dev); 1224 if (ret) 1225 goto pre_mutex_err; 1226 1227 if (dev_priv->ums.mm_suspended) { 1228 mutex_unlock(&dev->struct_mutex); 1229 ret = -EBUSY; 1230 goto pre_mutex_err; 1231 } 1232 1233 ctx = i915_gem_validate_context(dev, file, ring, ctx_id); 1234 if (IS_ERR(ctx)) { 1235 mutex_unlock(&dev->struct_mutex); 1236 ret = PTR_ERR(ctx); 1237 goto pre_mutex_err; 1238 } 1239 1240 i915_gem_context_reference(ctx); 1241 1242 vm = ctx->vm; 1243 if (!USES_FULL_PPGTT(dev)) 1244 vm = &dev_priv->gtt.base; 1245 1246 eb = eb_create(args); 1247 if (eb == NULL) { 1248 i915_gem_context_unreference(ctx); 1249 mutex_unlock(&dev->struct_mutex); 1250 ret = -ENOMEM; 1251 goto pre_mutex_err; 1252 } 1253 1254 /* Look up object handles */ 1255 ret = eb_lookup_vmas(eb, exec, args, vm, file); 1256 if (ret) 1257 goto err; 1258 1259 /* take note of the batch buffer before we might reorder the lists */ 1260 batch_obj = eb_get_batch(eb); 1261 1262 /* Move the objects en-masse into the GTT, evicting if necessary. */ 1263 need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0; 1264 ret = i915_gem_execbuffer_reserve(ring, &eb->vmas, &need_relocs); 1265 if (ret) 1266 goto err; 1267 1268 /* The objects are in their final locations, apply the relocations. */ 1269 if (need_relocs) 1270 ret = i915_gem_execbuffer_relocate(eb); 1271 if (ret) { 1272 if (ret == -EFAULT) { 1273 ret = i915_gem_execbuffer_relocate_slow(dev, args, file, ring, 1274 eb, exec); 1275 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1276 } 1277 if (ret) 1278 goto err; 1279 } 1280 1281 /* Set the pending read domains for the batch buffer to COMMAND */ 1282 if (batch_obj->base.pending_write_domain) { 1283 DRM_DEBUG("Attempting to use self-modifying batch buffer\n"); 1284 ret = -EINVAL; 1285 goto err; 1286 } 1287 batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND; 1288 1289 if (i915_needs_cmd_parser(ring)) { 1290 ret = i915_parse_cmds(ring, 1291 batch_obj, 1292 args->batch_start_offset, 1293 file->is_master); 1294 if (ret) 1295 goto err; 1296 1297 /* 1298 * XXX: Actually do this when enabling batch copy... 1299 * 1300 * Set the DISPATCH_SECURE bit to remove the NON_SECURE bit 1301 * from MI_BATCH_BUFFER_START commands issued in the 1302 * dispatch_execbuffer implementations. We specifically don't 1303 * want that set when the command parser is enabled. 1304 */ 1305 } 1306 1307 /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure 1308 * batch" bit. Hence we need to pin secure batches into the global gtt. 1309 * hsw should have this fixed, but bdw mucks it up again. */ 1310 if (flags & I915_DISPATCH_SECURE && 1311 !batch_obj->has_global_gtt_mapping) { 1312 /* When we have multiple VMs, we'll need to make sure that we 1313 * allocate space first */ 1314 struct i915_vma *vma = i915_gem_obj_to_ggtt(batch_obj); 1315 BUG_ON(!vma); 1316 vma->bind_vma(vma, batch_obj->cache_level, GLOBAL_BIND); 1317 } 1318 1319 if (flags & I915_DISPATCH_SECURE) 1320 exec_start += i915_gem_obj_ggtt_offset(batch_obj); 1321 else 1322 exec_start += i915_gem_obj_offset(batch_obj, vm); 1323 1324 ret = i915_gem_execbuffer_move_to_gpu(ring, &eb->vmas); 1325 if (ret) 1326 goto err; 1327 1328 ret = i915_switch_context(ring, ctx); 1329 if (ret) 1330 goto err; 1331 1332 if (ring == &dev_priv->ring[RCS] && 1333 mode != dev_priv->relative_constants_mode) { 1334 ret = intel_ring_begin(ring, 4); 1335 if (ret) 1336 goto err; 1337 1338 intel_ring_emit(ring, MI_NOOP); 1339 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 1340 intel_ring_emit(ring, INSTPM); 1341 intel_ring_emit(ring, mask << 16 | mode); 1342 intel_ring_advance(ring); 1343 1344 dev_priv->relative_constants_mode = mode; 1345 } 1346 1347 if (args->flags & I915_EXEC_GEN7_SOL_RESET) { 1348 ret = i915_reset_gen7_sol_offsets(dev, ring); 1349 if (ret) 1350 goto err; 1351 } 1352 1353 1354 exec_len = args->batch_len; 1355 if (cliprects) { 1356 for (i = 0; i < args->num_cliprects; i++) { 1357 ret = i915_emit_box(dev, &cliprects[i], 1358 args->DR1, args->DR4); 1359 if (ret) 1360 goto err; 1361 1362 ret = ring->dispatch_execbuffer(ring, 1363 exec_start, exec_len, 1364 flags); 1365 if (ret) 1366 goto err; 1367 } 1368 } else { 1369 ret = ring->dispatch_execbuffer(ring, 1370 exec_start, exec_len, 1371 flags); 1372 if (ret) 1373 goto err; 1374 } 1375 1376 trace_i915_gem_ring_dispatch(ring, intel_ring_get_seqno(ring), flags); 1377 1378 i915_gem_execbuffer_move_to_active(&eb->vmas, ring); 1379 i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj); 1380 1381 err: 1382 /* the request owns the ref now */ 1383 i915_gem_context_unreference(ctx); 1384 eb_destroy(eb); 1385 1386 mutex_unlock(&dev->struct_mutex); 1387 1388 pre_mutex_err: 1389 kfree(cliprects); 1390 1391 /* intel_gpu_busy should also get a ref, so it will free when the device 1392 * is really idle. */ 1393 intel_runtime_pm_put(dev_priv); 1394 return ret; 1395 } 1396 1397 /* 1398 * Legacy execbuffer just creates an exec2 list from the original exec object 1399 * list array and passes it to the real function. 1400 */ 1401 int 1402 i915_gem_execbuffer(struct drm_device *dev, void *data, 1403 struct drm_file *file) 1404 { 1405 struct drm_i915_gem_execbuffer *args = data; 1406 struct drm_i915_gem_execbuffer2 exec2; 1407 struct drm_i915_gem_exec_object *exec_list = NULL; 1408 struct drm_i915_gem_exec_object2 *exec2_list = NULL; 1409 int ret, i; 1410 1411 if (args->buffer_count < 1) { 1412 DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count); 1413 return -EINVAL; 1414 } 1415 1416 /* Copy in the exec list from userland */ 1417 exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count); 1418 exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count); 1419 if (exec_list == NULL || exec2_list == NULL) { 1420 DRM_DEBUG("Failed to allocate exec list for %d buffers\n", 1421 args->buffer_count); 1422 drm_free_large(exec_list); 1423 drm_free_large(exec2_list); 1424 return -ENOMEM; 1425 } 1426 ret = copy_from_user(exec_list, 1427 to_user_ptr(args->buffers_ptr), 1428 sizeof(*exec_list) * args->buffer_count); 1429 if (ret != 0) { 1430 DRM_DEBUG("copy %d exec entries failed %d\n", 1431 args->buffer_count, ret); 1432 drm_free_large(exec_list); 1433 drm_free_large(exec2_list); 1434 return -EFAULT; 1435 } 1436 1437 for (i = 0; i < args->buffer_count; i++) { 1438 exec2_list[i].handle = exec_list[i].handle; 1439 exec2_list[i].relocation_count = exec_list[i].relocation_count; 1440 exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr; 1441 exec2_list[i].alignment = exec_list[i].alignment; 1442 exec2_list[i].offset = exec_list[i].offset; 1443 if (INTEL_INFO(dev)->gen < 4) 1444 exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE; 1445 else 1446 exec2_list[i].flags = 0; 1447 } 1448 1449 exec2.buffers_ptr = args->buffers_ptr; 1450 exec2.buffer_count = args->buffer_count; 1451 exec2.batch_start_offset = args->batch_start_offset; 1452 exec2.batch_len = args->batch_len; 1453 exec2.DR1 = args->DR1; 1454 exec2.DR4 = args->DR4; 1455 exec2.num_cliprects = args->num_cliprects; 1456 exec2.cliprects_ptr = args->cliprects_ptr; 1457 exec2.flags = I915_EXEC_RENDER; 1458 i915_execbuffer2_set_context_id(exec2, 0); 1459 1460 ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list); 1461 if (!ret) { 1462 struct drm_i915_gem_exec_object __user *user_exec_list = 1463 to_user_ptr(args->buffers_ptr); 1464 1465 /* Copy the new buffer offsets back to the user's exec list. */ 1466 for (i = 0; i < args->buffer_count; i++) { 1467 ret = __copy_to_user(&user_exec_list[i].offset, 1468 &exec2_list[i].offset, 1469 sizeof(user_exec_list[i].offset)); 1470 if (ret) { 1471 ret = -EFAULT; 1472 DRM_DEBUG("failed to copy %d exec entries " 1473 "back to user (%d)\n", 1474 args->buffer_count, ret); 1475 break; 1476 } 1477 } 1478 } 1479 1480 drm_free_large(exec_list); 1481 drm_free_large(exec2_list); 1482 return ret; 1483 } 1484 1485 int 1486 i915_gem_execbuffer2(struct drm_device *dev, void *data, 1487 struct drm_file *file) 1488 { 1489 struct drm_i915_gem_execbuffer2 *args = data; 1490 struct drm_i915_gem_exec_object2 *exec2_list = NULL; 1491 int ret; 1492 1493 if (args->buffer_count < 1 || 1494 args->buffer_count > UINT_MAX / sizeof(*exec2_list)) { 1495 DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count); 1496 return -EINVAL; 1497 } 1498 1499 if (args->rsvd2 != 0) { 1500 DRM_DEBUG("dirty rvsd2 field\n"); 1501 return -EINVAL; 1502 } 1503 1504 exec2_list = kmalloc(sizeof(*exec2_list)*args->buffer_count, 1505 M_DRM, M_NOWAIT); 1506 if (exec2_list == NULL) 1507 exec2_list = drm_malloc_ab(sizeof(*exec2_list), 1508 args->buffer_count); 1509 if (exec2_list == NULL) { 1510 DRM_DEBUG("Failed to allocate exec list for %d buffers\n", 1511 args->buffer_count); 1512 return -ENOMEM; 1513 } 1514 ret = copy_from_user(exec2_list, 1515 to_user_ptr(args->buffers_ptr), 1516 sizeof(*exec2_list) * args->buffer_count); 1517 if (ret != 0) { 1518 DRM_DEBUG("copy %d exec entries failed %d\n", 1519 args->buffer_count, ret); 1520 drm_free_large(exec2_list); 1521 return -EFAULT; 1522 } 1523 1524 ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list); 1525 if (!ret) { 1526 /* Copy the new buffer offsets back to the user's exec list. */ 1527 struct drm_i915_gem_exec_object2 *user_exec_list = 1528 to_user_ptr(args->buffers_ptr); 1529 int i; 1530 1531 for (i = 0; i < args->buffer_count; i++) { 1532 ret = __copy_to_user(&user_exec_list[i].offset, 1533 &exec2_list[i].offset, 1534 sizeof(user_exec_list[i].offset)); 1535 if (ret) { 1536 ret = -EFAULT; 1537 DRM_DEBUG("failed to copy %d exec entries " 1538 "back to user\n", 1539 args->buffer_count); 1540 break; 1541 } 1542 } 1543 } 1544 1545 drm_free_large(exec2_list); 1546 return ret; 1547 } 1548