1 /* 2 * Copyright © 2008,2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * Chris Wilson <chris@chris-wilson.co.uk> 26 * 27 */ 28 29 #include <drm/drmP.h> 30 #include <drm/i915_drm.h> 31 #include "i915_drv.h" 32 #include "i915_trace.h" 33 #include "intel_drv.h" 34 #include <linux/highmem.h> 35 36 struct eb_objects { 37 struct list_head objects; 38 int and; 39 union { 40 struct drm_i915_gem_object *lut[0]; 41 struct hlist_head buckets[0]; 42 }; 43 }; 44 45 static struct eb_objects * 46 eb_create(struct drm_i915_gem_execbuffer2 *args) 47 { 48 struct eb_objects *eb = NULL; 49 50 if (args->flags & I915_EXEC_HANDLE_LUT) { 51 int size = args->buffer_count; 52 size *= sizeof(struct drm_i915_gem_object *); 53 size += sizeof(struct eb_objects); 54 eb = kmalloc(size, M_DRM, M_WAITOK); 55 } 56 57 if (eb == NULL) { 58 int size = args->buffer_count; 59 int count = PAGE_SIZE / sizeof(struct hlist_head) / 2; 60 BUILD_BUG_ON_NOT_POWER_OF_2(PAGE_SIZE / sizeof(struct hlist_head)); 61 while (count > 2*size) 62 count >>= 1; 63 eb = kzalloc(count*sizeof(struct hlist_head) + 64 sizeof(struct eb_objects), 65 GFP_TEMPORARY); 66 if (eb == NULL) 67 return eb; 68 69 eb->and = count - 1; 70 } else 71 eb->and = -args->buffer_count; 72 73 INIT_LIST_HEAD(&eb->objects); 74 return eb; 75 } 76 77 static void 78 eb_reset(struct eb_objects *eb) 79 { 80 if (eb->and >= 0) 81 memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head)); 82 } 83 84 static int 85 eb_lookup_objects(struct eb_objects *eb, 86 struct drm_i915_gem_exec_object2 *exec, 87 const struct drm_i915_gem_execbuffer2 *args, 88 struct drm_file *file) 89 { 90 int i; 91 92 lockmgr(&file->table_lock, LK_EXCLUSIVE); 93 for (i = 0; i < args->buffer_count; i++) { 94 struct drm_i915_gem_object *obj; 95 96 obj = to_intel_bo(idr_find(&file->object_idr, exec[i].handle)); 97 if (obj == NULL) { 98 lockmgr(&file->table_lock, LK_RELEASE); 99 DRM_DEBUG("Invalid object handle %d at index %d\n", 100 exec[i].handle, i); 101 return -ENOENT; 102 } 103 104 if (!list_empty(&obj->exec_list)) { 105 lockmgr(&file->table_lock, LK_RELEASE); 106 DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n", 107 obj, exec[i].handle, i); 108 return -EINVAL; 109 } 110 111 drm_gem_object_reference(&obj->base); 112 list_add_tail(&obj->exec_list, &eb->objects); 113 114 obj->exec_entry = &exec[i]; 115 if (eb->and < 0) { 116 eb->lut[i] = obj; 117 } else { 118 uint32_t handle = args->flags & I915_EXEC_HANDLE_LUT ? i : exec[i].handle; 119 obj->exec_handle = handle; 120 hlist_add_head(&obj->exec_node, 121 &eb->buckets[handle & eb->and]); 122 } 123 } 124 lockmgr(&file->table_lock, LK_RELEASE); 125 126 return 0; 127 } 128 129 static struct drm_i915_gem_object * 130 eb_get_object(struct eb_objects *eb, unsigned long handle) 131 { 132 if (eb->and < 0) { 133 if (handle >= -eb->and) 134 return NULL; 135 return eb->lut[handle]; 136 } else { 137 struct hlist_head *head; 138 struct hlist_node *node; 139 140 head = &eb->buckets[handle & eb->and]; 141 hlist_for_each(node, head) { 142 struct drm_i915_gem_object *obj; 143 144 obj = hlist_entry(node, struct drm_i915_gem_object, exec_node); 145 if (obj->exec_handle == handle) 146 return obj; 147 } 148 return NULL; 149 } 150 } 151 152 static void 153 eb_destroy(struct eb_objects *eb) 154 { 155 while (!list_empty(&eb->objects)) { 156 struct drm_i915_gem_object *obj; 157 158 obj = list_first_entry(&eb->objects, 159 struct drm_i915_gem_object, 160 exec_list); 161 list_del_init(&obj->exec_list); 162 drm_gem_object_unreference(&obj->base); 163 } 164 drm_free(eb, M_DRM); 165 } 166 167 static inline int use_cpu_reloc(struct drm_i915_gem_object *obj) 168 { 169 return (obj->base.write_domain == I915_GEM_DOMAIN_CPU || 170 !obj->map_and_fenceable || 171 obj->cache_level != I915_CACHE_NONE); 172 } 173 174 static int 175 i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, 176 struct eb_objects *eb, 177 struct drm_i915_gem_relocation_entry *reloc) 178 { 179 struct drm_device *dev = obj->base.dev; 180 struct drm_gem_object *target_obj; 181 struct drm_i915_gem_object *target_i915_obj; 182 uint32_t target_offset; 183 int ret = -EINVAL; 184 185 /* we've already hold a reference to all valid objects */ 186 target_obj = &eb_get_object(eb, reloc->target_handle)->base; 187 if (unlikely(target_obj == NULL)) 188 return -ENOENT; 189 190 target_i915_obj = to_intel_bo(target_obj); 191 target_offset = target_i915_obj->gtt_offset; 192 193 /* Sandybridge PPGTT errata: We need a global gtt mapping for MI and 194 * pipe_control writes because the gpu doesn't properly redirect them 195 * through the ppgtt for non_secure batchbuffers. */ 196 if (unlikely(IS_GEN6(dev) && 197 reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION && 198 !target_i915_obj->has_global_gtt_mapping)) { 199 i915_gem_gtt_bind_object(target_i915_obj, 200 target_i915_obj->cache_level); 201 } 202 203 /* Validate that the target is in a valid r/w GPU domain */ 204 if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) { 205 DRM_DEBUG("reloc with multiple write domains: " 206 "obj %p target %d offset %d " 207 "read %08x write %08x", 208 obj, reloc->target_handle, 209 (int) reloc->offset, 210 reloc->read_domains, 211 reloc->write_domain); 212 return ret; 213 } 214 if (unlikely((reloc->write_domain | reloc->read_domains) 215 & ~I915_GEM_GPU_DOMAINS)) { 216 DRM_DEBUG("reloc with read/write non-GPU domains: " 217 "obj %p target %d offset %d " 218 "read %08x write %08x", 219 obj, reloc->target_handle, 220 (int) reloc->offset, 221 reloc->read_domains, 222 reloc->write_domain); 223 return ret; 224 } 225 226 target_obj->pending_read_domains |= reloc->read_domains; 227 target_obj->pending_write_domain |= reloc->write_domain; 228 229 /* If the relocation already has the right value in it, no 230 * more work needs to be done. 231 */ 232 if (target_offset == reloc->presumed_offset) 233 return 0; 234 235 /* Check that the relocation address is valid... */ 236 if (unlikely(reloc->offset > obj->base.size - 4)) { 237 DRM_DEBUG("Relocation beyond object bounds: " 238 "obj %p target %d offset %d size %d.\n", 239 obj, reloc->target_handle, 240 (int) reloc->offset, 241 (int) obj->base.size); 242 return ret; 243 } 244 if (unlikely(reloc->offset & 3)) { 245 DRM_DEBUG("Relocation not 4-byte aligned: " 246 "obj %p target %d offset %d.\n", 247 obj, reloc->target_handle, 248 (int) reloc->offset); 249 return ret; 250 } 251 252 /* We can't wait for rendering with pagefaults disabled */ 253 if (obj->active && (curthread->td_flags & TDF_NOFAULT)) 254 return -EFAULT; 255 256 reloc->delta += target_offset; 257 if (use_cpu_reloc(obj)) { 258 uint32_t page_offset = reloc->offset & PAGE_MASK; 259 char *vaddr; 260 261 ret = i915_gem_object_set_to_cpu_domain(obj, 1); 262 if (ret) 263 return ret; 264 265 vaddr = kmap_atomic(i915_gem_object_get_page(obj, 266 reloc->offset >> PAGE_SHIFT)); 267 *(uint32_t *)(vaddr + page_offset) = reloc->delta; 268 kunmap_atomic(vaddr); 269 } else { 270 uint32_t __iomem *reloc_entry; 271 char __iomem *reloc_page; 272 273 ret = i915_gem_object_set_to_gtt_domain(obj, true); 274 if (ret) 275 return ret; 276 277 ret = i915_gem_object_put_fence(obj); 278 if (ret) 279 return ret; 280 281 /* Map the page containing the relocation we're going to perform. */ 282 reloc->offset += obj->gtt_offset; 283 reloc_page = pmap_mapdev_attr(dev->agp->base + (reloc->offset & 284 ~PAGE_MASK), PAGE_SIZE, PAT_WRITE_COMBINING); 285 reloc_entry = (uint32_t *)(reloc_page + (reloc->offset & 286 PAGE_MASK)); 287 iowrite32(reloc->delta, reloc_entry); 288 pmap_unmapdev((vm_offset_t)reloc_page, PAGE_SIZE); 289 } 290 291 /* and update the user's relocation entry */ 292 reloc->presumed_offset = target_offset; 293 294 return 0; 295 } 296 297 static int 298 i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj, 299 struct eb_objects *eb) 300 { 301 #define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry)) 302 struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)]; 303 struct drm_i915_gem_relocation_entry __user *user_relocs; 304 struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; 305 int remain, ret; 306 307 user_relocs = to_user_ptr(entry->relocs_ptr); 308 309 remain = entry->relocation_count; 310 while (remain) { 311 struct drm_i915_gem_relocation_entry *r = stack_reloc; 312 int count = remain; 313 if (count > ARRAY_SIZE(stack_reloc)) 314 count = ARRAY_SIZE(stack_reloc); 315 remain -= count; 316 317 if (copyin_nofault(user_relocs, r, count*sizeof(r[0]))) 318 return -EFAULT; 319 320 do { 321 u64 offset = r->presumed_offset; 322 323 ret = i915_gem_execbuffer_relocate_entry(obj, eb, r); 324 if (ret) 325 return ret; 326 327 if (r->presumed_offset != offset && 328 copyout_nofault(&r->presumed_offset, 329 &user_relocs->presumed_offset, 330 sizeof(r->presumed_offset))) { 331 return -EFAULT; 332 } 333 334 user_relocs++; 335 r++; 336 } while (--count); 337 } 338 339 return 0; 340 #undef N_RELOC 341 } 342 343 static int 344 i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj, 345 struct eb_objects *eb, 346 struct drm_i915_gem_relocation_entry *relocs) 347 { 348 const struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; 349 int i, ret; 350 351 for (i = 0; i < entry->relocation_count; i++) { 352 ret = i915_gem_execbuffer_relocate_entry(obj, eb, &relocs[i]); 353 if (ret) 354 return ret; 355 } 356 357 return 0; 358 } 359 360 static int 361 i915_gem_execbuffer_relocate(struct eb_objects *eb) 362 { 363 struct drm_i915_gem_object *obj; 364 int ret = 0; 365 366 /* This is the fast path and we cannot handle a pagefault whilst 367 * holding the struct mutex lest the user pass in the relocations 368 * contained within a mmaped bo. For in such a case we, the page 369 * fault handler would call i915_gem_fault() and we would try to 370 * acquire the struct mutex again. Obviously this is bad and so 371 * lockdep complains vehemently. 372 */ 373 #if 0 374 pagefault_disable(); 375 #endif 376 list_for_each_entry(obj, &eb->objects, exec_list) { 377 ret = i915_gem_execbuffer_relocate_object(obj, eb); 378 if (ret) 379 break; 380 } 381 #if 0 382 pagefault_enable(); 383 #endif 384 385 return ret; 386 } 387 388 #define __EXEC_OBJECT_HAS_PIN (1<<31) 389 #define __EXEC_OBJECT_HAS_FENCE (1<<30) 390 391 static int 392 need_reloc_mappable(struct drm_i915_gem_object *obj) 393 { 394 struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; 395 return entry->relocation_count && !use_cpu_reloc(obj); 396 } 397 398 static int 399 i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj, 400 struct intel_ring_buffer *ring, 401 bool *need_reloc) 402 { 403 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 404 struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; 405 bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; 406 bool need_fence, need_mappable; 407 int ret; 408 409 need_fence = 410 has_fenced_gpu_access && 411 entry->flags & EXEC_OBJECT_NEEDS_FENCE && 412 obj->tiling_mode != I915_TILING_NONE; 413 need_mappable = need_fence || need_reloc_mappable(obj); 414 415 ret = i915_gem_object_pin(obj, entry->alignment, need_mappable, false); 416 if (ret) 417 return ret; 418 419 entry->flags |= __EXEC_OBJECT_HAS_PIN; 420 421 if (has_fenced_gpu_access) { 422 if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { 423 ret = i915_gem_object_get_fence(obj); 424 if (ret) 425 return ret; 426 427 if (i915_gem_object_pin_fence(obj)) 428 entry->flags |= __EXEC_OBJECT_HAS_FENCE; 429 430 obj->pending_fenced_gpu_access = true; 431 } 432 } 433 434 /* Ensure ppgtt mapping exists if needed */ 435 if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) { 436 i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt, 437 obj, obj->cache_level); 438 439 obj->has_aliasing_ppgtt_mapping = 1; 440 } 441 442 if (entry->offset != obj->gtt_offset) { 443 entry->offset = obj->gtt_offset; 444 *need_reloc = true; 445 } 446 447 if (entry->flags & EXEC_OBJECT_WRITE) { 448 obj->base.pending_read_domains = I915_GEM_DOMAIN_RENDER; 449 obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER; 450 } 451 452 if (entry->flags & EXEC_OBJECT_NEEDS_GTT && 453 !obj->has_global_gtt_mapping) 454 i915_gem_gtt_bind_object(obj, obj->cache_level); 455 456 return 0; 457 } 458 459 static void 460 i915_gem_execbuffer_unreserve_object(struct drm_i915_gem_object *obj) 461 { 462 struct drm_i915_gem_exec_object2 *entry; 463 464 if (!obj->gtt_space) 465 return; 466 467 entry = obj->exec_entry; 468 469 if (entry->flags & __EXEC_OBJECT_HAS_FENCE) 470 i915_gem_object_unpin_fence(obj); 471 472 if (entry->flags & __EXEC_OBJECT_HAS_PIN) 473 i915_gem_object_unpin(obj); 474 475 entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN); 476 } 477 478 static int 479 i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, 480 struct list_head *objects, 481 bool *need_relocs) 482 { 483 struct drm_i915_gem_object *obj; 484 struct list_head ordered_objects; 485 bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; 486 int retry; 487 488 INIT_LIST_HEAD(&ordered_objects); 489 while (!list_empty(objects)) { 490 struct drm_i915_gem_exec_object2 *entry; 491 bool need_fence, need_mappable; 492 493 obj = list_first_entry(objects, 494 struct drm_i915_gem_object, 495 exec_list); 496 entry = obj->exec_entry; 497 498 need_fence = 499 has_fenced_gpu_access && 500 entry->flags & EXEC_OBJECT_NEEDS_FENCE && 501 obj->tiling_mode != I915_TILING_NONE; 502 need_mappable = need_fence || need_reloc_mappable(obj); 503 504 if (need_mappable) 505 list_move(&obj->exec_list, &ordered_objects); 506 else 507 list_move_tail(&obj->exec_list, &ordered_objects); 508 509 obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND; 510 obj->base.pending_write_domain = 0; 511 obj->pending_fenced_gpu_access = false; 512 } 513 list_splice(&ordered_objects, objects); 514 515 /* Attempt to pin all of the buffers into the GTT. 516 * This is done in 3 phases: 517 * 518 * 1a. Unbind all objects that do not match the GTT constraints for 519 * the execbuffer (fenceable, mappable, alignment etc). 520 * 1b. Increment pin count for already bound objects. 521 * 2. Bind new objects. 522 * 3. Decrement pin count. 523 * 524 * This avoid unnecessary unbinding of later objects in order to make 525 * room for the earlier objects *unless* we need to defragment. 526 */ 527 retry = 0; 528 do { 529 int ret = 0; 530 531 /* Unbind any ill-fitting objects or pin. */ 532 list_for_each_entry(obj, objects, exec_list) { 533 struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; 534 bool need_fence, need_mappable; 535 536 if (!obj->gtt_space) 537 continue; 538 539 need_fence = 540 has_fenced_gpu_access && 541 entry->flags & EXEC_OBJECT_NEEDS_FENCE && 542 obj->tiling_mode != I915_TILING_NONE; 543 need_mappable = need_fence || need_reloc_mappable(obj); 544 545 if ((entry->alignment && obj->gtt_offset & (entry->alignment - 1)) || 546 (need_mappable && !obj->map_and_fenceable)) 547 ret = i915_gem_object_unbind(obj); 548 else 549 ret = i915_gem_execbuffer_reserve_object(obj, ring, need_relocs); 550 if (ret) 551 goto err; 552 } 553 554 /* Bind fresh objects */ 555 list_for_each_entry(obj, objects, exec_list) { 556 if (obj->gtt_space) 557 continue; 558 559 ret = i915_gem_execbuffer_reserve_object(obj, ring, need_relocs); 560 if (ret) 561 goto err; 562 } 563 564 err: /* Decrement pin count for bound objects */ 565 list_for_each_entry(obj, objects, exec_list) 566 i915_gem_execbuffer_unreserve_object(obj); 567 568 if (ret != -ENOSPC || retry++) 569 return ret; 570 571 ret = i915_gem_evict_everything(ring->dev); 572 if (ret) 573 return ret; 574 } while (1); 575 } 576 577 static int 578 i915_gem_execbuffer_relocate_slow(struct drm_device *dev, 579 struct drm_i915_gem_execbuffer2 *args, 580 struct drm_file *file, 581 struct intel_ring_buffer *ring, 582 struct eb_objects *eb, 583 struct drm_i915_gem_exec_object2 *exec) 584 { 585 struct drm_i915_gem_relocation_entry *reloc; 586 struct drm_i915_gem_object *obj; 587 bool need_relocs; 588 int *reloc_offset; 589 int i, total, ret; 590 int count = args->buffer_count; 591 592 /* We may process another execbuffer during the unlock... */ 593 while (!list_empty(&eb->objects)) { 594 obj = list_first_entry(&eb->objects, 595 struct drm_i915_gem_object, 596 exec_list); 597 list_del_init(&obj->exec_list); 598 drm_gem_object_unreference(&obj->base); 599 } 600 601 mutex_unlock(&dev->struct_mutex); 602 603 total = 0; 604 for (i = 0; i < count; i++) 605 total += exec[i].relocation_count; 606 607 reloc_offset = drm_malloc_ab(count, sizeof(*reloc_offset)); 608 reloc = drm_malloc_ab(total, sizeof(*reloc)); 609 if (reloc == NULL || reloc_offset == NULL) { 610 drm_free_large(reloc); 611 drm_free_large(reloc_offset); 612 mutex_lock(&dev->struct_mutex); 613 return -ENOMEM; 614 } 615 616 total = 0; 617 for (i = 0; i < count; i++) { 618 struct drm_i915_gem_relocation_entry __user *user_relocs; 619 u64 invalid_offset = (u64)-1; 620 int j; 621 622 user_relocs = to_user_ptr(exec[i].relocs_ptr); 623 624 if (copy_from_user(reloc+total, user_relocs, 625 exec[i].relocation_count * sizeof(*reloc))) { 626 ret = -EFAULT; 627 mutex_lock(&dev->struct_mutex); 628 goto err; 629 } 630 631 /* As we do not update the known relocation offsets after 632 * relocating (due to the complexities in lock handling), 633 * we need to mark them as invalid now so that we force the 634 * relocation processing next time. Just in case the target 635 * object is evicted and then rebound into its old 636 * presumed_offset before the next execbuffer - if that 637 * happened we would make the mistake of assuming that the 638 * relocations were valid. 639 */ 640 for (j = 0; j < exec[i].relocation_count; j++) { 641 if (copy_to_user(&user_relocs[j].presumed_offset, 642 &invalid_offset, 643 sizeof(invalid_offset))) { 644 ret = -EFAULT; 645 mutex_lock(&dev->struct_mutex); 646 goto err; 647 } 648 } 649 650 reloc_offset[i] = total; 651 total += exec[i].relocation_count; 652 } 653 654 ret = i915_mutex_lock_interruptible(dev); 655 if (ret) { 656 mutex_lock(&dev->struct_mutex); 657 goto err; 658 } 659 660 /* reacquire the objects */ 661 eb_reset(eb); 662 ret = eb_lookup_objects(eb, exec, args, file); 663 if (ret) 664 goto err; 665 666 need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0; 667 ret = i915_gem_execbuffer_reserve(ring, &eb->objects, &need_relocs); 668 if (ret) 669 goto err; 670 671 list_for_each_entry(obj, &eb->objects, exec_list) { 672 int offset = obj->exec_entry - exec; 673 ret = i915_gem_execbuffer_relocate_object_slow(obj, eb, 674 reloc + reloc_offset[offset]); 675 if (ret) 676 goto err; 677 } 678 679 /* Leave the user relocations as are, this is the painfully slow path, 680 * and we want to avoid the complication of dropping the lock whilst 681 * having buffers reserved in the aperture and so causing spurious 682 * ENOSPC for random operations. 683 */ 684 685 err: 686 drm_free_large(reloc); 687 drm_free_large(reloc_offset); 688 return ret; 689 } 690 691 static int 692 i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, 693 struct list_head *objects) 694 { 695 struct drm_i915_gem_object *obj; 696 uint32_t flush_domains = 0; 697 int ret; 698 699 list_for_each_entry(obj, objects, exec_list) { 700 ret = i915_gem_object_sync(obj, ring); 701 if (ret) 702 return ret; 703 704 if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) 705 i915_gem_clflush_object(obj); 706 707 flush_domains |= obj->base.write_domain; 708 } 709 710 if (flush_domains & I915_GEM_DOMAIN_CPU) 711 i915_gem_chipset_flush(ring->dev); 712 713 if (flush_domains & I915_GEM_DOMAIN_GTT) 714 cpu_sfence(); 715 716 /* Unconditionally invalidate gpu caches and ensure that we do flush 717 * any residual writes from the previous batch. 718 */ 719 return intel_ring_invalidate_all_caches(ring); 720 } 721 722 static bool 723 i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) 724 { 725 if (exec->flags & __I915_EXEC_UNKNOWN_FLAGS) 726 return false; 727 728 return ((exec->batch_start_offset | exec->batch_len) & 0x7) == 0; 729 } 730 731 static int 732 validate_exec_list(struct drm_i915_gem_exec_object2 *exec, 733 int count) 734 { 735 int i; 736 int relocs_total = 0; 737 int relocs_max = INT_MAX / sizeof(struct drm_i915_gem_relocation_entry); 738 739 for (i = 0; i < count; i++) { 740 #if 0 741 char __user *ptr = to_user_ptr(exec[i].relocs_ptr); 742 #endif 743 int length; /* limited by fault_in_pages_readable() */ 744 745 if (exec[i].flags & __EXEC_OBJECT_UNKNOWN_FLAGS) 746 return -EINVAL; 747 748 /* First check for malicious input causing overflow in 749 * the worst case where we need to allocate the entire 750 * relocation tree as a single array. 751 */ 752 if (exec[i].relocation_count > relocs_max - relocs_total) 753 return -EINVAL; 754 relocs_total += exec[i].relocation_count; 755 756 length = exec[i].relocation_count * 757 sizeof(struct drm_i915_gem_relocation_entry); 758 #if 0 759 /* 760 * We must check that the entire relocation array is safe 761 * to read, but since we may need to update the presumed 762 * offsets during execution, check for full write access. 763 */ 764 if (!access_ok(VERIFY_WRITE, ptr, length)) 765 return -EFAULT; 766 767 if (fault_in_multipages_readable(ptr, length)) 768 return -EFAULT; 769 #endif 770 } 771 772 return 0; 773 } 774 775 static void 776 i915_gem_execbuffer_move_to_active(struct list_head *objects, 777 struct intel_ring_buffer *ring) 778 { 779 struct drm_i915_gem_object *obj; 780 781 list_for_each_entry(obj, objects, exec_list) { 782 783 obj->base.write_domain = obj->base.pending_write_domain; 784 if (obj->base.write_domain == 0) 785 obj->base.pending_read_domains |= obj->base.read_domains; 786 obj->base.read_domains = obj->base.pending_read_domains; 787 obj->fenced_gpu_access = obj->pending_fenced_gpu_access; 788 789 i915_gem_object_move_to_active(obj, ring); 790 if (obj->base.write_domain) { 791 obj->dirty = 1; 792 obj->last_write_seqno = intel_ring_get_seqno(ring); 793 if (obj->pin_count) /* check for potential scanout */ 794 intel_mark_fb_busy(obj, ring); 795 } 796 797 trace_i915_gem_object_change_domain(obj, old_read, old_write); 798 } 799 } 800 801 static void 802 i915_gem_execbuffer_retire_commands(struct drm_device *dev, 803 struct drm_file *file, 804 struct intel_ring_buffer *ring, 805 struct drm_i915_gem_object *obj) 806 { 807 /* Unconditionally force add_request to emit a full flush. */ 808 ring->gpu_caches_dirty = true; 809 810 /* Add a breadcrumb for the completion of the batch buffer */ 811 (void)__i915_add_request(ring, file, obj, NULL); 812 } 813 814 static int 815 i915_reset_gen7_sol_offsets(struct drm_device *dev, 816 struct intel_ring_buffer *ring) 817 { 818 drm_i915_private_t *dev_priv = dev->dev_private; 819 int ret, i; 820 821 if (!IS_GEN7(dev) || ring != &dev_priv->ring[RCS]) 822 return 0; 823 824 ret = intel_ring_begin(ring, 4 * 3); 825 if (ret) 826 return ret; 827 828 for (i = 0; i < 4; i++) { 829 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 830 intel_ring_emit(ring, GEN7_SO_WRITE_OFFSET(i)); 831 intel_ring_emit(ring, 0); 832 } 833 834 intel_ring_advance(ring); 835 836 return 0; 837 } 838 839 static int 840 i915_gem_do_execbuffer(struct drm_device *dev, void *data, 841 struct drm_file *file, 842 struct drm_i915_gem_execbuffer2 *args, 843 struct drm_i915_gem_exec_object2 *exec) 844 { 845 drm_i915_private_t *dev_priv = dev->dev_private; 846 struct eb_objects *eb; 847 struct drm_i915_gem_object *batch_obj; 848 struct drm_clip_rect *cliprects = NULL; 849 struct intel_ring_buffer *ring; 850 u32 ctx_id = i915_execbuffer2_get_context_id(*args); 851 u32 exec_start, exec_len; 852 u32 mask, flags; 853 int ret, mode, i; 854 bool need_relocs; 855 856 if (!i915_gem_check_execbuffer(args)) 857 return -EINVAL; 858 859 ret = validate_exec_list(exec, args->buffer_count); 860 if (ret) 861 return ret; 862 863 flags = 0; 864 if (args->flags & I915_EXEC_SECURE) { 865 flags |= I915_DISPATCH_SECURE; 866 } 867 if (args->flags & I915_EXEC_IS_PINNED) 868 flags |= I915_DISPATCH_PINNED; 869 870 switch (args->flags & I915_EXEC_RING_MASK) { 871 case I915_EXEC_DEFAULT: 872 case I915_EXEC_RENDER: 873 ring = &dev_priv->ring[RCS]; 874 break; 875 case I915_EXEC_BSD: 876 ring = &dev_priv->ring[VCS]; 877 if (ctx_id != 0) { 878 DRM_DEBUG("Ring %s doesn't support contexts\n", 879 ring->name); 880 return -EPERM; 881 } 882 break; 883 case I915_EXEC_BLT: 884 ring = &dev_priv->ring[BCS]; 885 if (ctx_id != 0) { 886 DRM_DEBUG("Ring %s doesn't support contexts\n", 887 ring->name); 888 return -EPERM; 889 } 890 break; 891 case I915_EXEC_VEBOX: 892 ring = &dev_priv->ring[VECS]; 893 if (ctx_id != 0) { 894 DRM_DEBUG("Ring %s doesn't support contexts\n", 895 ring->name); 896 return -EPERM; 897 } 898 break; 899 900 default: 901 DRM_DEBUG("execbuf with unknown ring: %d\n", 902 (int)(args->flags & I915_EXEC_RING_MASK)); 903 return -EINVAL; 904 } 905 if (!intel_ring_initialized(ring)) { 906 DRM_DEBUG("execbuf with invalid ring: %d\n", 907 (int)(args->flags & I915_EXEC_RING_MASK)); 908 return -EINVAL; 909 } 910 911 mode = args->flags & I915_EXEC_CONSTANTS_MASK; 912 mask = I915_EXEC_CONSTANTS_MASK; 913 switch (mode) { 914 case I915_EXEC_CONSTANTS_REL_GENERAL: 915 case I915_EXEC_CONSTANTS_ABSOLUTE: 916 case I915_EXEC_CONSTANTS_REL_SURFACE: 917 if (ring == &dev_priv->ring[RCS] && 918 mode != dev_priv->relative_constants_mode) { 919 if (INTEL_INFO(dev)->gen < 4) 920 return -EINVAL; 921 922 if (INTEL_INFO(dev)->gen > 5 && 923 mode == I915_EXEC_CONSTANTS_REL_SURFACE) 924 return -EINVAL; 925 926 /* The HW changed the meaning on this bit on gen6 */ 927 if (INTEL_INFO(dev)->gen >= 6) 928 mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE; 929 } 930 break; 931 default: 932 DRM_DEBUG("execbuf with unknown constants: %d\n", mode); 933 return -EINVAL; 934 } 935 936 if (args->buffer_count < 1) { 937 DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count); 938 return -EINVAL; 939 } 940 941 if (args->num_cliprects != 0) { 942 if (ring != &dev_priv->ring[RCS]) { 943 DRM_DEBUG("clip rectangles are only valid with the render ring\n"); 944 return -EINVAL; 945 } 946 947 if (INTEL_INFO(dev)->gen >= 5) { 948 DRM_DEBUG("clip rectangles are only valid on pre-gen5\n"); 949 return -EINVAL; 950 } 951 952 if (args->num_cliprects > UINT_MAX / sizeof(*cliprects)) { 953 DRM_DEBUG("execbuf with %u cliprects\n", 954 args->num_cliprects); 955 return -EINVAL; 956 } 957 958 cliprects = kmalloc(args->num_cliprects * sizeof(*cliprects), 959 M_DRM, M_WAITOK); 960 if (cliprects == NULL) { 961 ret = -ENOMEM; 962 goto pre_mutex_err; 963 } 964 965 if (copy_from_user(cliprects, 966 to_user_ptr(args->cliprects_ptr), 967 sizeof(*cliprects)*args->num_cliprects)) { 968 ret = -EFAULT; 969 goto pre_mutex_err; 970 } 971 } 972 973 ret = i915_mutex_lock_interruptible(dev); 974 if (ret) 975 goto pre_mutex_err; 976 977 if (dev_priv->mm.suspended) { 978 mutex_unlock(&dev->struct_mutex); 979 ret = -EBUSY; 980 goto pre_mutex_err; 981 } 982 983 eb = eb_create(args); 984 if (eb == NULL) { 985 mutex_unlock(&dev->struct_mutex); 986 ret = -ENOMEM; 987 goto pre_mutex_err; 988 } 989 990 /* Look up object handles */ 991 ret = eb_lookup_objects(eb, exec, args, file); 992 if (ret) 993 goto err; 994 995 /* take note of the batch buffer before we might reorder the lists */ 996 batch_obj = list_entry(eb->objects.prev, 997 struct drm_i915_gem_object, 998 exec_list); 999 1000 /* Move the objects en-masse into the GTT, evicting if necessary. */ 1001 need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0; 1002 ret = i915_gem_execbuffer_reserve(ring, &eb->objects, &need_relocs); 1003 if (ret) 1004 goto err; 1005 1006 /* The objects are in their final locations, apply the relocations. */ 1007 if (need_relocs) 1008 ret = i915_gem_execbuffer_relocate(eb); 1009 if (ret) { 1010 if (ret == -EFAULT) { 1011 ret = i915_gem_execbuffer_relocate_slow(dev, args, file, ring, 1012 eb, exec); 1013 DRM_LOCK_ASSERT(dev); 1014 } 1015 if (ret) 1016 goto err; 1017 } 1018 1019 /* Set the pending read domains for the batch buffer to COMMAND */ 1020 if (batch_obj->base.pending_write_domain) { 1021 DRM_DEBUG("Attempting to use self-modifying batch buffer\n"); 1022 ret = -EINVAL; 1023 goto err; 1024 } 1025 batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND; 1026 1027 /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure 1028 * batch" bit. Hence we need to pin secure batches into the global gtt. 1029 * hsw should have this fixed, but let's be paranoid and do it 1030 * unconditionally for now. */ 1031 if (flags & I915_DISPATCH_SECURE && !batch_obj->has_global_gtt_mapping) 1032 i915_gem_gtt_bind_object(batch_obj, batch_obj->cache_level); 1033 1034 ret = i915_gem_execbuffer_move_to_gpu(ring, &eb->objects); 1035 if (ret) 1036 goto err; 1037 1038 ret = i915_switch_context(ring, file, ctx_id); 1039 if (ret) 1040 goto err; 1041 1042 if (ring == &dev_priv->ring[RCS] && 1043 mode != dev_priv->relative_constants_mode) { 1044 ret = intel_ring_begin(ring, 4); 1045 if (ret) 1046 goto err; 1047 1048 intel_ring_emit(ring, MI_NOOP); 1049 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 1050 intel_ring_emit(ring, INSTPM); 1051 intel_ring_emit(ring, mask << 16 | mode); 1052 intel_ring_advance(ring); 1053 1054 dev_priv->relative_constants_mode = mode; 1055 } 1056 1057 if (args->flags & I915_EXEC_GEN7_SOL_RESET) { 1058 ret = i915_reset_gen7_sol_offsets(dev, ring); 1059 if (ret) 1060 goto err; 1061 } 1062 1063 exec_start = batch_obj->gtt_offset + args->batch_start_offset; 1064 exec_len = args->batch_len; 1065 if (cliprects) { 1066 for (i = 0; i < args->num_cliprects; i++) { 1067 ret = i915_emit_box(dev, &cliprects[i], 1068 args->DR1, args->DR4); 1069 if (ret) 1070 goto err; 1071 1072 ret = ring->dispatch_execbuffer(ring, 1073 exec_start, exec_len, 1074 flags); 1075 if (ret) 1076 goto err; 1077 } 1078 } else { 1079 ret = ring->dispatch_execbuffer(ring, 1080 exec_start, exec_len, 1081 flags); 1082 if (ret) 1083 goto err; 1084 } 1085 1086 trace_i915_gem_ring_dispatch(ring, intel_ring_get_seqno(ring), flags); 1087 1088 i915_gem_execbuffer_move_to_active(&eb->objects, ring); 1089 i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj); 1090 1091 err: 1092 eb_destroy(eb); 1093 1094 mutex_unlock(&dev->struct_mutex); 1095 1096 pre_mutex_err: 1097 drm_free(cliprects, M_DRM); 1098 return ret; 1099 } 1100 1101 /* 1102 * Legacy execbuffer just creates an exec2 list from the original exec object 1103 * list array and passes it to the real function. 1104 */ 1105 int 1106 i915_gem_execbuffer(struct drm_device *dev, void *data, 1107 struct drm_file *file) 1108 { 1109 struct drm_i915_gem_execbuffer *args = data; 1110 struct drm_i915_gem_execbuffer2 exec2; 1111 struct drm_i915_gem_exec_object *exec_list = NULL; 1112 struct drm_i915_gem_exec_object2 *exec2_list = NULL; 1113 int ret, i; 1114 1115 if (args->buffer_count < 1) { 1116 DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count); 1117 return -EINVAL; 1118 } 1119 1120 /* Copy in the exec list from userland */ 1121 exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count); 1122 exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count); 1123 if (exec_list == NULL || exec2_list == NULL) { 1124 DRM_DEBUG("Failed to allocate exec list for %d buffers\n", 1125 args->buffer_count); 1126 drm_free_large(exec_list); 1127 drm_free_large(exec2_list); 1128 return -ENOMEM; 1129 } 1130 ret = copy_from_user(exec_list, 1131 to_user_ptr(args->buffers_ptr), 1132 sizeof(*exec_list) * args->buffer_count); 1133 if (ret != 0) { 1134 DRM_DEBUG("copy %d exec entries failed %d\n", 1135 args->buffer_count, ret); 1136 drm_free_large(exec_list); 1137 drm_free_large(exec2_list); 1138 return -EFAULT; 1139 } 1140 1141 for (i = 0; i < args->buffer_count; i++) { 1142 exec2_list[i].handle = exec_list[i].handle; 1143 exec2_list[i].relocation_count = exec_list[i].relocation_count; 1144 exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr; 1145 exec2_list[i].alignment = exec_list[i].alignment; 1146 exec2_list[i].offset = exec_list[i].offset; 1147 if (INTEL_INFO(dev)->gen < 4) 1148 exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE; 1149 else 1150 exec2_list[i].flags = 0; 1151 } 1152 1153 exec2.buffers_ptr = args->buffers_ptr; 1154 exec2.buffer_count = args->buffer_count; 1155 exec2.batch_start_offset = args->batch_start_offset; 1156 exec2.batch_len = args->batch_len; 1157 exec2.DR1 = args->DR1; 1158 exec2.DR4 = args->DR4; 1159 exec2.num_cliprects = args->num_cliprects; 1160 exec2.cliprects_ptr = args->cliprects_ptr; 1161 exec2.flags = I915_EXEC_RENDER; 1162 i915_execbuffer2_set_context_id(exec2, 0); 1163 1164 ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list); 1165 if (!ret) { 1166 /* Copy the new buffer offsets back to the user's exec list. */ 1167 for (i = 0; i < args->buffer_count; i++) 1168 exec_list[i].offset = exec2_list[i].offset; 1169 /* ... and back out to userspace */ 1170 ret = copy_to_user(to_user_ptr(args->buffers_ptr), 1171 exec_list, 1172 sizeof(*exec_list) * args->buffer_count); 1173 if (ret) { 1174 ret = -EFAULT; 1175 DRM_DEBUG("failed to copy %d exec entries " 1176 "back to user (%d)\n", 1177 args->buffer_count, ret); 1178 } 1179 } 1180 1181 drm_free_large(exec_list); 1182 drm_free_large(exec2_list); 1183 return ret; 1184 } 1185 1186 int 1187 i915_gem_execbuffer2(struct drm_device *dev, void *data, 1188 struct drm_file *file) 1189 { 1190 struct drm_i915_gem_execbuffer2 *args = data; 1191 struct drm_i915_gem_exec_object2 *exec2_list = NULL; 1192 int ret; 1193 1194 if (args->buffer_count < 1 || 1195 args->buffer_count > UINT_MAX / sizeof(*exec2_list)) { 1196 DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count); 1197 return -EINVAL; 1198 } 1199 1200 exec2_list = kmalloc(sizeof(*exec2_list)*args->buffer_count, 1201 M_DRM, M_WAITOK); 1202 if (exec2_list == NULL) 1203 exec2_list = drm_malloc_ab(sizeof(*exec2_list), 1204 args->buffer_count); 1205 if (exec2_list == NULL) { 1206 DRM_DEBUG("Failed to allocate exec list for %d buffers\n", 1207 args->buffer_count); 1208 return -ENOMEM; 1209 } 1210 ret = copy_from_user(exec2_list, 1211 to_user_ptr(args->buffers_ptr), 1212 sizeof(*exec2_list) * args->buffer_count); 1213 if (ret != 0) { 1214 DRM_DEBUG("copy %d exec entries failed %d\n", 1215 args->buffer_count, ret); 1216 drm_free_large(exec2_list); 1217 return -EFAULT; 1218 } 1219 1220 ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list); 1221 if (!ret) { 1222 /* Copy the new buffer offsets back to the user's exec list. */ 1223 ret = copy_to_user(to_user_ptr(args->buffers_ptr), 1224 exec2_list, 1225 sizeof(*exec2_list) * args->buffer_count); 1226 if (ret) { 1227 ret = -EFAULT; 1228 DRM_DEBUG("failed to copy %d exec entries " 1229 "back to user (%d)\n", 1230 args->buffer_count, ret); 1231 } 1232 } 1233 1234 drm_free_large(exec2_list); 1235 return ret; 1236 } 1237