1 /* 2 * Copyright © 2008,2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * Chris Wilson <chris@chris-wilson.co.uk> 26 * 27 */ 28 29 #include <linux/dma_remapping.h> 30 #include <linux/reservation.h> 31 #include <linux/sync_file.h> 32 #include <linux/uaccess.h> 33 34 #include <drm/drmP.h> 35 #include <drm/i915_drm.h> 36 37 #include "i915_drv.h" 38 #include "i915_gem_clflush.h" 39 #include "i915_trace.h" 40 #include "intel_drv.h" 41 #include "intel_frontbuffer.h" 42 43 #define DBG_USE_CPU_RELOC 0 /* -1 force GTT relocs; 1 force CPU relocs */ 44 45 #define __EXEC_OBJECT_HAS_PIN (1<<31) 46 #define __EXEC_OBJECT_HAS_FENCE (1<<30) 47 #define __EXEC_OBJECT_NEEDS_MAP (1<<29) 48 #define __EXEC_OBJECT_NEEDS_BIAS (1<<28) 49 #define __EXEC_OBJECT_INTERNAL_FLAGS (0xf<<28) /* all of the above */ 50 51 #define BATCH_OFFSET_BIAS (256*1024) 52 53 struct i915_execbuffer_params { 54 struct drm_device *dev; 55 struct drm_file *file; 56 struct i915_vma *batch; 57 u32 dispatch_flags; 58 u32 args_batch_start_offset; 59 struct intel_engine_cs *engine; 60 struct i915_gem_context *ctx; 61 struct drm_i915_gem_request *request; 62 }; 63 64 struct eb_vmas { 65 struct drm_i915_private *i915; 66 struct list_head vmas; 67 int and; 68 union { 69 struct i915_vma *lut[0]; 70 struct hlist_head buckets[0]; 71 }; 72 }; 73 74 static struct eb_vmas * 75 eb_create(struct drm_i915_private *i915, 76 struct drm_i915_gem_execbuffer2 *args) 77 { 78 struct eb_vmas *eb = NULL; 79 80 if (args->flags & I915_EXEC_HANDLE_LUT) { 81 unsigned size = args->buffer_count; 82 size *= sizeof(struct i915_vma *); 83 size += sizeof(struct eb_vmas); 84 eb = kmalloc(size, M_DRM, 85 GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY); 86 } 87 88 if (eb == NULL) { 89 unsigned size = args->buffer_count; 90 unsigned count = PAGE_SIZE / sizeof(struct hlist_head) / 2; 91 BUILD_BUG_ON_NOT_POWER_OF_2(PAGE_SIZE / sizeof(struct hlist_head)); 92 while (count > 2*size) 93 count >>= 1; 94 eb = kzalloc(count*sizeof(struct hlist_head) + 95 sizeof(struct eb_vmas), 96 GFP_TEMPORARY); 97 if (eb == NULL) 98 return eb; 99 100 eb->and = count - 1; 101 } else 102 eb->and = -args->buffer_count; 103 104 eb->i915 = i915; 105 INIT_LIST_HEAD(&eb->vmas); 106 return eb; 107 } 108 109 static void 110 eb_reset(struct eb_vmas *eb) 111 { 112 if (eb->and >= 0) 113 memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head)); 114 } 115 116 static struct i915_vma * 117 eb_get_batch(struct eb_vmas *eb) 118 { 119 struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list); 120 121 /* 122 * SNA is doing fancy tricks with compressing batch buffers, which leads 123 * to negative relocation deltas. Usually that works out ok since the 124 * relocate address is still positive, except when the batch is placed 125 * very low in the GTT. Ensure this doesn't happen. 126 * 127 * Note that actual hangs have only been observed on gen7, but for 128 * paranoia do it everywhere. 129 */ 130 if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0) 131 vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; 132 133 return vma; 134 } 135 136 static int 137 eb_lookup_vmas(struct eb_vmas *eb, 138 struct drm_i915_gem_exec_object2 *exec, 139 const struct drm_i915_gem_execbuffer2 *args, 140 struct i915_address_space *vm, 141 struct drm_file *file) 142 { 143 struct drm_i915_gem_object *obj; 144 struct list_head objects; 145 int i, ret; 146 147 INIT_LIST_HEAD(&objects); 148 lockmgr(&file->table_lock, LK_EXCLUSIVE); 149 /* Grab a reference to the object and release the lock so we can lookup 150 * or create the VMA without using GFP_ATOMIC */ 151 for (i = 0; i < args->buffer_count; i++) { 152 obj = to_intel_bo(idr_find(&file->object_idr, exec[i].handle)); 153 if (obj == NULL) { 154 lockmgr(&file->table_lock, LK_RELEASE); 155 DRM_DEBUG("Invalid object handle %d at index %d\n", 156 exec[i].handle, i); 157 ret = -ENOENT; 158 goto err; 159 } 160 161 if (!list_empty(&obj->obj_exec_link)) { 162 lockmgr(&file->table_lock, LK_RELEASE); 163 DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n", 164 obj, exec[i].handle, i); 165 ret = -EINVAL; 166 goto err; 167 } 168 169 i915_gem_object_get(obj); 170 list_add_tail(&obj->obj_exec_link, &objects); 171 } 172 lockmgr(&file->table_lock, LK_RELEASE); 173 174 i = 0; 175 while (!list_empty(&objects)) { 176 struct i915_vma *vma; 177 178 obj = list_first_entry(&objects, 179 struct drm_i915_gem_object, 180 obj_exec_link); 181 182 /* 183 * NOTE: We can leak any vmas created here when something fails 184 * later on. But that's no issue since vma_unbind can deal with 185 * vmas which are not actually bound. And since only 186 * lookup_or_create exists as an interface to get at the vma 187 * from the (obj, vm) we don't run the risk of creating 188 * duplicated vmas for the same vm. 189 */ 190 vma = i915_vma_instance(obj, vm, NULL); 191 if (unlikely(IS_ERR(vma))) { 192 DRM_DEBUG("Failed to lookup VMA\n"); 193 ret = PTR_ERR(vma); 194 goto err; 195 } 196 197 /* Transfer ownership from the objects list to the vmas list. */ 198 list_add_tail(&vma->exec_list, &eb->vmas); 199 list_del_init(&obj->obj_exec_link); 200 201 vma->exec_entry = &exec[i]; 202 if (eb->and < 0) { 203 eb->lut[i] = vma; 204 } else { 205 uint32_t handle = args->flags & I915_EXEC_HANDLE_LUT ? i : exec[i].handle; 206 vma->exec_handle = handle; 207 hlist_add_head(&vma->exec_node, 208 &eb->buckets[handle & eb->and]); 209 } 210 ++i; 211 } 212 213 return 0; 214 215 216 err: 217 while (!list_empty(&objects)) { 218 obj = list_first_entry(&objects, 219 struct drm_i915_gem_object, 220 obj_exec_link); 221 list_del_init(&obj->obj_exec_link); 222 i915_gem_object_put(obj); 223 } 224 /* 225 * Objects already transfered to the vmas list will be unreferenced by 226 * eb_destroy. 227 */ 228 229 return ret; 230 } 231 232 static struct i915_vma *eb_get_vma(struct eb_vmas *eb, unsigned long handle) 233 { 234 if (eb->and < 0) { 235 if (handle >= -eb->and) 236 return NULL; 237 return eb->lut[handle]; 238 } else { 239 struct hlist_head *head; 240 struct i915_vma *vma; 241 242 head = &eb->buckets[handle & eb->and]; 243 hlist_for_each_entry(vma, head, exec_node) { 244 if (vma->exec_handle == handle) 245 return vma; 246 } 247 return NULL; 248 } 249 } 250 251 static void 252 i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma) 253 { 254 struct drm_i915_gem_exec_object2 *entry; 255 256 if (!drm_mm_node_allocated(&vma->node)) 257 return; 258 259 entry = vma->exec_entry; 260 261 if (entry->flags & __EXEC_OBJECT_HAS_FENCE) 262 i915_vma_unpin_fence(vma); 263 264 if (entry->flags & __EXEC_OBJECT_HAS_PIN) 265 __i915_vma_unpin(vma); 266 267 entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN); 268 } 269 270 static void eb_destroy(struct eb_vmas *eb) 271 { 272 while (!list_empty(&eb->vmas)) { 273 struct i915_vma *vma; 274 275 vma = list_first_entry(&eb->vmas, 276 struct i915_vma, 277 exec_list); 278 list_del_init(&vma->exec_list); 279 i915_gem_execbuffer_unreserve_vma(vma); 280 vma->exec_entry = NULL; 281 i915_vma_put(vma); 282 } 283 kfree(eb); 284 } 285 286 static inline int use_cpu_reloc(struct drm_i915_gem_object *obj) 287 { 288 if (!i915_gem_object_has_struct_page(obj)) 289 return false; 290 291 if (DBG_USE_CPU_RELOC) 292 return DBG_USE_CPU_RELOC > 0; 293 294 return (HAS_LLC(to_i915(obj->base.dev)) || 295 obj->base.write_domain == I915_GEM_DOMAIN_CPU || 296 obj->cache_level != I915_CACHE_NONE); 297 } 298 299 /* Used to convert any address to canonical form. 300 * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS, 301 * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the 302 * addresses to be in a canonical form: 303 * "GraphicsAddress[63:48] are ignored by the HW and assumed to be in correct 304 * canonical form [63:48] == [47]." 305 */ 306 #define GEN8_HIGH_ADDRESS_BIT 47 307 static inline uint64_t gen8_canonical_addr(uint64_t address) 308 { 309 return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT); 310 } 311 312 static inline uint64_t gen8_noncanonical_addr(uint64_t address) 313 { 314 return address & ((1ULL << (GEN8_HIGH_ADDRESS_BIT + 1)) - 1); 315 } 316 317 static inline uint64_t 318 relocation_target(const struct drm_i915_gem_relocation_entry *reloc, 319 uint64_t target_offset) 320 { 321 return gen8_canonical_addr((int)reloc->delta + target_offset); 322 } 323 324 struct reloc_cache { 325 struct drm_i915_private *i915; 326 struct drm_mm_node node; 327 unsigned long vaddr; 328 unsigned int page; 329 bool use_64bit_reloc; 330 }; 331 332 static void reloc_cache_init(struct reloc_cache *cache, 333 struct drm_i915_private *i915) 334 { 335 cache->page = -1; 336 cache->vaddr = 0; 337 cache->i915 = i915; 338 /* Must be a variable in the struct to allow GCC to unroll. */ 339 cache->use_64bit_reloc = HAS_64BIT_RELOC(i915); 340 cache->node.allocated = false; 341 } 342 343 static inline void *unmask_page(unsigned long p) 344 { 345 return (void *)(uintptr_t)(p & LINUX_PAGE_MASK); 346 } 347 348 static inline unsigned int unmask_flags(unsigned long p) 349 { 350 return p & ~LINUX_PAGE_MASK; 351 } 352 353 #define KMAP 0x4 /* after CLFLUSH_FLAGS */ 354 355 static void reloc_cache_fini(struct reloc_cache *cache) 356 { 357 void *vaddr; 358 359 if (!cache->vaddr) 360 return; 361 362 vaddr = unmask_page(cache->vaddr); 363 if (cache->vaddr & KMAP) { 364 if (cache->vaddr & CLFLUSH_AFTER) 365 mb(); 366 367 kunmap_atomic(vaddr); 368 i915_gem_obj_finish_shmem_access((struct drm_i915_gem_object *)cache->node.mm); 369 } else { 370 wmb(); 371 io_mapping_unmap_atomic((void __iomem *)vaddr); 372 if (cache->node.allocated) { 373 struct i915_ggtt *ggtt = &cache->i915->ggtt; 374 375 ggtt->base.clear_range(&ggtt->base, 376 cache->node.start, 377 cache->node.size); 378 drm_mm_remove_node(&cache->node); 379 } else { 380 i915_vma_unpin((struct i915_vma *)cache->node.mm); 381 } 382 } 383 } 384 385 static void *reloc_kmap(struct drm_i915_gem_object *obj, 386 struct reloc_cache *cache, 387 int page) 388 { 389 void *vaddr; 390 391 if (cache->vaddr) { 392 kunmap_atomic(unmask_page(cache->vaddr)); 393 } else { 394 unsigned int flushes; 395 int ret; 396 397 ret = i915_gem_obj_prepare_shmem_write(obj, &flushes); 398 if (ret) 399 return ERR_PTR(ret); 400 401 BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS); 402 BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & LINUX_PAGE_MASK); 403 404 cache->vaddr = flushes | KMAP; 405 cache->node.mm = (void *)obj; 406 if (flushes) 407 mb(); 408 } 409 410 vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, page)); 411 cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr; 412 cache->page = page; 413 414 return vaddr; 415 } 416 417 static void *reloc_iomap(struct drm_i915_gem_object *obj, 418 struct reloc_cache *cache, 419 int page) 420 { 421 struct i915_ggtt *ggtt = &cache->i915->ggtt; 422 unsigned long offset; 423 void *vaddr; 424 425 if (cache->vaddr) { 426 io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr)); 427 } else { 428 struct i915_vma *vma; 429 int ret; 430 431 if (use_cpu_reloc(obj)) 432 return NULL; 433 434 ret = i915_gem_object_set_to_gtt_domain(obj, true); 435 if (ret) 436 return ERR_PTR(ret); 437 438 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 439 PIN_MAPPABLE | PIN_NONBLOCK); 440 if (IS_ERR(vma)) { 441 memset(&cache->node, 0, sizeof(cache->node)); 442 ret = drm_mm_insert_node_in_range 443 (&ggtt->base.mm, &cache->node, 444 PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE, 445 0, ggtt->mappable_end, 446 DRM_MM_INSERT_LOW); 447 if (ret) /* no inactive aperture space, use cpu reloc */ 448 return NULL; 449 } else { 450 ret = i915_vma_put_fence(vma); 451 if (ret) { 452 i915_vma_unpin(vma); 453 return ERR_PTR(ret); 454 } 455 456 cache->node.start = vma->node.start; 457 cache->node.mm = (void *)vma; 458 } 459 } 460 461 offset = cache->node.start; 462 if (cache->node.allocated) { 463 wmb(); 464 ggtt->base.insert_page(&ggtt->base, 465 i915_gem_object_get_dma_address(obj, page), 466 offset, I915_CACHE_NONE, 0); 467 } else { 468 offset += page << PAGE_SHIFT; 469 } 470 471 vaddr = (void __force *) io_mapping_map_atomic_wc(&cache->i915->ggtt.mappable, offset); 472 cache->page = page; 473 cache->vaddr = (unsigned long)vaddr; 474 475 return vaddr; 476 } 477 478 static void *reloc_vaddr(struct drm_i915_gem_object *obj, 479 struct reloc_cache *cache, 480 int page) 481 { 482 void *vaddr; 483 484 if (cache->page == page) { 485 vaddr = unmask_page(cache->vaddr); 486 } else { 487 vaddr = NULL; 488 if ((cache->vaddr & KMAP) == 0) 489 vaddr = reloc_iomap(obj, cache, page); 490 if (!vaddr) 491 vaddr = reloc_kmap(obj, cache, page); 492 } 493 494 return vaddr; 495 } 496 497 static void clflush_write32(u32 *addr, u32 value, unsigned int flushes) 498 { 499 if (unlikely(flushes & (CLFLUSH_BEFORE | CLFLUSH_AFTER))) { 500 if (flushes & CLFLUSH_BEFORE) { 501 clflushopt(addr); 502 mb(); 503 } 504 505 *addr = value; 506 507 /* Writes to the same cacheline are serialised by the CPU 508 * (including clflush). On the write path, we only require 509 * that it hits memory in an orderly fashion and place 510 * mb barriers at the start and end of the relocation phase 511 * to ensure ordering of clflush wrt to the system. 512 */ 513 if (flushes & CLFLUSH_AFTER) 514 clflushopt(addr); 515 } else 516 *addr = value; 517 } 518 519 static int 520 relocate_entry(struct drm_i915_gem_object *obj, 521 const struct drm_i915_gem_relocation_entry *reloc, 522 struct reloc_cache *cache, 523 u64 target_offset) 524 { 525 u64 offset = reloc->offset; 526 bool wide = cache->use_64bit_reloc; 527 void *vaddr; 528 529 target_offset = relocation_target(reloc, target_offset); 530 repeat: 531 vaddr = reloc_vaddr(obj, cache, offset >> PAGE_SHIFT); 532 if (IS_ERR(vaddr)) 533 return PTR_ERR(vaddr); 534 535 clflush_write32(vaddr + offset_in_page(offset), 536 lower_32_bits(target_offset), 537 cache->vaddr); 538 539 if (wide) { 540 offset += sizeof(u32); 541 target_offset >>= 32; 542 wide = false; 543 goto repeat; 544 } 545 546 return 0; 547 } 548 549 static int 550 i915_gem_execbuffer_relocate_entry(struct i915_vma *vma, 551 struct eb_vmas *eb, 552 struct drm_i915_gem_relocation_entry *reloc, 553 struct reloc_cache *cache) 554 { 555 struct drm_i915_gem_object *obj = vma->obj; 556 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 557 struct drm_gem_object *target_obj; 558 struct drm_i915_gem_object *target_i915_obj; 559 struct i915_vma *target_vma; 560 uint64_t target_offset; 561 int ret; 562 563 /* we've already hold a reference to all valid objects */ 564 target_vma = eb_get_vma(eb, reloc->target_handle); 565 if (unlikely(target_vma == NULL)) 566 return -ENOENT; 567 target_i915_obj = target_vma->obj; 568 target_obj = &target_vma->obj->base; 569 570 target_offset = gen8_canonical_addr(target_vma->node.start); 571 572 /* Sandybridge PPGTT errata: We need a global gtt mapping for MI and 573 * pipe_control writes because the gpu doesn't properly redirect them 574 * through the ppgtt for non_secure batchbuffers. */ 575 if (unlikely(IS_GEN6(dev_priv) && 576 reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION)) { 577 ret = i915_vma_bind(target_vma, target_i915_obj->cache_level, 578 PIN_GLOBAL); 579 if (WARN_ONCE(ret, "Unexpected failure to bind target VMA!")) 580 return ret; 581 } 582 583 /* Validate that the target is in a valid r/w GPU domain */ 584 if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) { 585 DRM_DEBUG("reloc with multiple write domains: " 586 "obj %p target %d offset %d " 587 "read %08x write %08x", 588 obj, reloc->target_handle, 589 (int) reloc->offset, 590 reloc->read_domains, 591 reloc->write_domain); 592 return -EINVAL; 593 } 594 if (unlikely((reloc->write_domain | reloc->read_domains) 595 & ~I915_GEM_GPU_DOMAINS)) { 596 DRM_DEBUG("reloc with read/write non-GPU domains: " 597 "obj %p target %d offset %d " 598 "read %08x write %08x", 599 obj, reloc->target_handle, 600 (int) reloc->offset, 601 reloc->read_domains, 602 reloc->write_domain); 603 return -EINVAL; 604 } 605 606 target_obj->pending_read_domains |= reloc->read_domains; 607 target_obj->pending_write_domain |= reloc->write_domain; 608 609 /* If the relocation already has the right value in it, no 610 * more work needs to be done. 611 */ 612 if (target_offset == reloc->presumed_offset) 613 return 0; 614 615 /* Check that the relocation address is valid... */ 616 if (unlikely(reloc->offset > 617 obj->base.size - (cache->use_64bit_reloc ? 8 : 4))) { 618 DRM_DEBUG("Relocation beyond object bounds: " 619 "obj %p target %d offset %d size %d.\n", 620 obj, reloc->target_handle, 621 (int) reloc->offset, 622 (int) obj->base.size); 623 return -EINVAL; 624 } 625 if (unlikely(reloc->offset & 3)) { 626 DRM_DEBUG("Relocation not 4-byte aligned: " 627 "obj %p target %d offset %d.\n", 628 obj, reloc->target_handle, 629 (int) reloc->offset); 630 return -EINVAL; 631 } 632 633 /* 634 * If we write into the object, we need to force the synchronisation 635 * barrier, either with an asynchronous clflush or if we executed the 636 * patching using the GPU (though that should be serialised by the 637 * timeline). To be completely sure, and since we are required to 638 * do relocations we are already stalling, disable the user's opt 639 * of our synchronisation. 640 */ 641 vma->exec_entry->flags &= ~EXEC_OBJECT_ASYNC; 642 643 ret = relocate_entry(obj, reloc, cache, target_offset); 644 if (ret) 645 return ret; 646 647 /* and update the user's relocation entry */ 648 reloc->presumed_offset = target_offset; 649 return 0; 650 } 651 652 static int 653 i915_gem_execbuffer_relocate_vma(struct i915_vma *vma, 654 struct eb_vmas *eb) 655 { 656 #define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry)) 657 struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)]; 658 struct drm_i915_gem_relocation_entry __user *user_relocs; 659 struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; 660 struct reloc_cache cache; 661 int remain, ret = 0; 662 663 user_relocs = u64_to_user_ptr(entry->relocs_ptr); 664 reloc_cache_init(&cache, eb->i915); 665 666 remain = entry->relocation_count; 667 while (remain) { 668 struct drm_i915_gem_relocation_entry *r = stack_reloc; 669 unsigned long unwritten; 670 unsigned int count; 671 672 count = min_t(unsigned int, remain, ARRAY_SIZE(stack_reloc)); 673 remain -= count; 674 675 /* This is the fast path and we cannot handle a pagefault 676 * whilst holding the struct mutex lest the user pass in the 677 * relocations contained within a mmaped bo. For in such a case 678 * we, the page fault handler would call i915_gem_fault() and 679 * we would try to acquire the struct mutex again. Obviously 680 * this is bad and so lockdep complains vehemently. 681 */ 682 pagefault_disable(); 683 unwritten = __copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0])); 684 pagefault_enable(); 685 if (unlikely(unwritten)) { 686 ret = -EFAULT; 687 goto out; 688 } 689 690 do { 691 u64 offset = r->presumed_offset; 692 693 ret = i915_gem_execbuffer_relocate_entry(vma, eb, r, &cache); 694 if (ret) 695 goto out; 696 697 if (r->presumed_offset != offset) { 698 pagefault_disable(); 699 unwritten = __put_user(r->presumed_offset, 700 &user_relocs->presumed_offset); 701 pagefault_enable(); 702 if (unlikely(unwritten)) { 703 /* Note that reporting an error now 704 * leaves everything in an inconsistent 705 * state as we have *already* changed 706 * the relocation value inside the 707 * object. As we have not changed the 708 * reloc.presumed_offset or will not 709 * change the execobject.offset, on the 710 * call we may not rewrite the value 711 * inside the object, leaving it 712 * dangling and causing a GPU hang. 713 */ 714 ret = -EFAULT; 715 goto out; 716 } 717 } 718 719 user_relocs++; 720 r++; 721 } while (--count); 722 } 723 724 out: 725 reloc_cache_fini(&cache); 726 return ret; 727 #undef N_RELOC 728 } 729 730 static int 731 i915_gem_execbuffer_relocate_vma_slow(struct i915_vma *vma, 732 struct eb_vmas *eb, 733 struct drm_i915_gem_relocation_entry *relocs) 734 { 735 const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; 736 struct reloc_cache cache; 737 int i, ret = 0; 738 739 reloc_cache_init(&cache, eb->i915); 740 for (i = 0; i < entry->relocation_count; i++) { 741 ret = i915_gem_execbuffer_relocate_entry(vma, eb, &relocs[i], &cache); 742 if (ret) 743 break; 744 } 745 reloc_cache_fini(&cache); 746 747 return ret; 748 } 749 750 static int 751 i915_gem_execbuffer_relocate(struct eb_vmas *eb) 752 { 753 struct i915_vma *vma; 754 int ret = 0; 755 756 list_for_each_entry(vma, &eb->vmas, exec_list) { 757 ret = i915_gem_execbuffer_relocate_vma(vma, eb); 758 if (ret) 759 break; 760 } 761 762 return ret; 763 } 764 765 static bool only_mappable_for_reloc(unsigned int flags) 766 { 767 return (flags & (EXEC_OBJECT_NEEDS_FENCE | __EXEC_OBJECT_NEEDS_MAP)) == 768 __EXEC_OBJECT_NEEDS_MAP; 769 } 770 771 static int 772 i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, 773 struct intel_engine_cs *engine, 774 bool *need_reloc) 775 { 776 struct drm_i915_gem_object *obj = vma->obj; 777 struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; 778 uint64_t flags; 779 int ret; 780 781 flags = PIN_USER; 782 if (entry->flags & EXEC_OBJECT_NEEDS_GTT) 783 flags |= PIN_GLOBAL; 784 785 if (!drm_mm_node_allocated(&vma->node)) { 786 /* Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset, 787 * limit address to the first 4GBs for unflagged objects. 788 */ 789 if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0) 790 flags |= PIN_ZONE_4G; 791 if (entry->flags & __EXEC_OBJECT_NEEDS_MAP) 792 flags |= PIN_GLOBAL | PIN_MAPPABLE; 793 if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS) 794 flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS; 795 if (entry->flags & EXEC_OBJECT_PINNED) 796 flags |= entry->offset | PIN_OFFSET_FIXED; 797 if ((flags & PIN_MAPPABLE) == 0) 798 flags |= PIN_HIGH; 799 } 800 801 ret = i915_vma_pin(vma, 802 entry->pad_to_size, 803 entry->alignment, 804 flags); 805 if ((ret == -ENOSPC || ret == -E2BIG) && 806 only_mappable_for_reloc(entry->flags)) 807 ret = i915_vma_pin(vma, 808 entry->pad_to_size, 809 entry->alignment, 810 flags & ~PIN_MAPPABLE); 811 if (ret) 812 return ret; 813 814 entry->flags |= __EXEC_OBJECT_HAS_PIN; 815 816 if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { 817 ret = i915_vma_get_fence(vma); 818 if (ret) 819 return ret; 820 821 if (i915_vma_pin_fence(vma)) 822 entry->flags |= __EXEC_OBJECT_HAS_FENCE; 823 } 824 825 if (entry->offset != vma->node.start) { 826 entry->offset = vma->node.start; 827 *need_reloc = true; 828 } 829 830 if (entry->flags & EXEC_OBJECT_WRITE) { 831 obj->base.pending_read_domains = I915_GEM_DOMAIN_RENDER; 832 obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER; 833 } 834 835 return 0; 836 } 837 838 static bool 839 need_reloc_mappable(struct i915_vma *vma) 840 { 841 struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; 842 843 if (entry->relocation_count == 0) 844 return false; 845 846 if (!i915_vma_is_ggtt(vma)) 847 return false; 848 849 /* See also use_cpu_reloc() */ 850 if (HAS_LLC(to_i915(vma->obj->base.dev))) 851 return false; 852 853 if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU) 854 return false; 855 856 return true; 857 } 858 859 static bool 860 eb_vma_misplaced(struct i915_vma *vma) 861 { 862 struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; 863 864 WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP && 865 !i915_vma_is_ggtt(vma)); 866 867 if (entry->alignment && !IS_ALIGNED(vma->node.start, entry->alignment)) 868 return true; 869 870 if (vma->node.size < entry->pad_to_size) 871 return true; 872 873 if (entry->flags & EXEC_OBJECT_PINNED && 874 vma->node.start != entry->offset) 875 return true; 876 877 if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS && 878 vma->node.start < BATCH_OFFSET_BIAS) 879 return true; 880 881 /* avoid costly ping-pong once a batch bo ended up non-mappable */ 882 if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && 883 !i915_vma_is_map_and_fenceable(vma)) 884 return !only_mappable_for_reloc(entry->flags); 885 886 if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0 && 887 (vma->node.start + vma->node.size - 1) >> 32) 888 return true; 889 890 return false; 891 } 892 893 static int 894 i915_gem_execbuffer_reserve(struct intel_engine_cs *engine, 895 struct list_head *vmas, 896 struct i915_gem_context *ctx, 897 bool *need_relocs) 898 { 899 struct drm_i915_gem_object *obj; 900 struct i915_vma *vma; 901 struct i915_address_space *vm; 902 struct list_head ordered_vmas; 903 struct list_head pinned_vmas; 904 bool has_fenced_gpu_access = INTEL_GEN(engine->i915) < 4; 905 bool needs_unfenced_map = INTEL_INFO(engine->i915)->unfenced_needs_alignment; 906 int retry; 907 908 vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm; 909 910 INIT_LIST_HEAD(&ordered_vmas); 911 INIT_LIST_HEAD(&pinned_vmas); 912 while (!list_empty(vmas)) { 913 struct drm_i915_gem_exec_object2 *entry; 914 bool need_fence, need_mappable; 915 916 vma = list_first_entry(vmas, struct i915_vma, exec_list); 917 obj = vma->obj; 918 entry = vma->exec_entry; 919 920 if (ctx->flags & CONTEXT_NO_ZEROMAP) 921 entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; 922 923 if (!has_fenced_gpu_access) 924 entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE; 925 need_fence = 926 (entry->flags & EXEC_OBJECT_NEEDS_FENCE || 927 needs_unfenced_map) && 928 i915_gem_object_is_tiled(obj); 929 need_mappable = need_fence || need_reloc_mappable(vma); 930 931 if (entry->flags & EXEC_OBJECT_PINNED) 932 list_move_tail(&vma->exec_list, &pinned_vmas); 933 else if (need_mappable) { 934 entry->flags |= __EXEC_OBJECT_NEEDS_MAP; 935 list_move(&vma->exec_list, &ordered_vmas); 936 } else 937 list_move_tail(&vma->exec_list, &ordered_vmas); 938 939 obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND; 940 obj->base.pending_write_domain = 0; 941 } 942 list_splice(&ordered_vmas, vmas); 943 list_splice(&pinned_vmas, vmas); 944 945 /* Attempt to pin all of the buffers into the GTT. 946 * This is done in 3 phases: 947 * 948 * 1a. Unbind all objects that do not match the GTT constraints for 949 * the execbuffer (fenceable, mappable, alignment etc). 950 * 1b. Increment pin count for already bound objects. 951 * 2. Bind new objects. 952 * 3. Decrement pin count. 953 * 954 * This avoid unnecessary unbinding of later objects in order to make 955 * room for the earlier objects *unless* we need to defragment. 956 */ 957 retry = 0; 958 do { 959 int ret = 0; 960 961 /* Unbind any ill-fitting objects or pin. */ 962 list_for_each_entry(vma, vmas, exec_list) { 963 if (!drm_mm_node_allocated(&vma->node)) 964 continue; 965 966 if (eb_vma_misplaced(vma)) 967 ret = i915_vma_unbind(vma); 968 else 969 ret = i915_gem_execbuffer_reserve_vma(vma, 970 engine, 971 need_relocs); 972 if (ret) 973 goto err; 974 } 975 976 /* Bind fresh objects */ 977 list_for_each_entry(vma, vmas, exec_list) { 978 if (drm_mm_node_allocated(&vma->node)) 979 continue; 980 981 ret = i915_gem_execbuffer_reserve_vma(vma, engine, 982 need_relocs); 983 if (ret) 984 goto err; 985 } 986 987 err: 988 if (ret != -ENOSPC || retry++) 989 return ret; 990 991 /* Decrement pin count for bound objects */ 992 list_for_each_entry(vma, vmas, exec_list) 993 i915_gem_execbuffer_unreserve_vma(vma); 994 995 ret = i915_gem_evict_vm(vm, true); 996 if (ret) 997 return ret; 998 } while (1); 999 } 1000 1001 static int 1002 i915_gem_execbuffer_relocate_slow(struct drm_device *dev, 1003 struct drm_i915_gem_execbuffer2 *args, 1004 struct drm_file *file, 1005 struct intel_engine_cs *engine, 1006 struct eb_vmas *eb, 1007 struct drm_i915_gem_exec_object2 *exec, 1008 struct i915_gem_context *ctx) 1009 { 1010 struct drm_i915_gem_relocation_entry *reloc; 1011 struct i915_address_space *vm; 1012 struct i915_vma *vma; 1013 bool need_relocs; 1014 int *reloc_offset; 1015 int i, total, ret; 1016 unsigned count = args->buffer_count; 1017 1018 vm = list_first_entry(&eb->vmas, struct i915_vma, exec_list)->vm; 1019 1020 /* We may process another execbuffer during the unlock... */ 1021 while (!list_empty(&eb->vmas)) { 1022 vma = list_first_entry(&eb->vmas, struct i915_vma, exec_list); 1023 list_del_init(&vma->exec_list); 1024 i915_gem_execbuffer_unreserve_vma(vma); 1025 i915_vma_put(vma); 1026 } 1027 1028 mutex_unlock(&dev->struct_mutex); 1029 1030 total = 0; 1031 for (i = 0; i < count; i++) 1032 total += exec[i].relocation_count; 1033 1034 reloc_offset = drm_malloc_ab(count, sizeof(*reloc_offset)); 1035 reloc = drm_malloc_ab(total, sizeof(*reloc)); 1036 if (reloc == NULL || reloc_offset == NULL) { 1037 drm_free_large(reloc); 1038 drm_free_large(reloc_offset); 1039 mutex_lock(&dev->struct_mutex); 1040 return -ENOMEM; 1041 } 1042 1043 total = 0; 1044 for (i = 0; i < count; i++) { 1045 struct drm_i915_gem_relocation_entry __user *user_relocs; 1046 u64 invalid_offset = (u64)-1; 1047 int j; 1048 1049 user_relocs = u64_to_user_ptr(exec[i].relocs_ptr); 1050 1051 if (copy_from_user(reloc+total, user_relocs, 1052 exec[i].relocation_count * sizeof(*reloc))) { 1053 ret = -EFAULT; 1054 mutex_lock(&dev->struct_mutex); 1055 goto err; 1056 } 1057 1058 /* As we do not update the known relocation offsets after 1059 * relocating (due to the complexities in lock handling), 1060 * we need to mark them as invalid now so that we force the 1061 * relocation processing next time. Just in case the target 1062 * object is evicted and then rebound into its old 1063 * presumed_offset before the next execbuffer - if that 1064 * happened we would make the mistake of assuming that the 1065 * relocations were valid. 1066 */ 1067 for (j = 0; j < exec[i].relocation_count; j++) { 1068 if (__copy_to_user(&user_relocs[j].presumed_offset, 1069 &invalid_offset, 1070 sizeof(invalid_offset))) { 1071 ret = -EFAULT; 1072 mutex_lock(&dev->struct_mutex); 1073 goto err; 1074 } 1075 } 1076 1077 reloc_offset[i] = total; 1078 total += exec[i].relocation_count; 1079 } 1080 1081 ret = i915_mutex_lock_interruptible(dev); 1082 if (ret) { 1083 mutex_lock(&dev->struct_mutex); 1084 goto err; 1085 } 1086 1087 /* reacquire the objects */ 1088 eb_reset(eb); 1089 ret = eb_lookup_vmas(eb, exec, args, vm, file); 1090 if (ret) 1091 goto err; 1092 1093 need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0; 1094 ret = i915_gem_execbuffer_reserve(engine, &eb->vmas, ctx, 1095 &need_relocs); 1096 if (ret) 1097 goto err; 1098 1099 list_for_each_entry(vma, &eb->vmas, exec_list) { 1100 int offset = vma->exec_entry - exec; 1101 ret = i915_gem_execbuffer_relocate_vma_slow(vma, eb, 1102 reloc + reloc_offset[offset]); 1103 if (ret) 1104 goto err; 1105 } 1106 1107 /* Leave the user relocations as are, this is the painfully slow path, 1108 * and we want to avoid the complication of dropping the lock whilst 1109 * having buffers reserved in the aperture and so causing spurious 1110 * ENOSPC for random operations. 1111 */ 1112 1113 err: 1114 drm_free_large(reloc); 1115 drm_free_large(reloc_offset); 1116 return ret; 1117 } 1118 1119 static int 1120 i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, 1121 struct list_head *vmas) 1122 { 1123 struct i915_vma *vma; 1124 int ret; 1125 1126 list_for_each_entry(vma, vmas, exec_list) { 1127 struct drm_i915_gem_object *obj = vma->obj; 1128 1129 if (vma->exec_entry->flags & EXEC_OBJECT_ASYNC) 1130 continue; 1131 1132 if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) { 1133 i915_gem_clflush_object(obj, 0); 1134 obj->base.write_domain = 0; 1135 } 1136 1137 ret = i915_gem_request_await_object 1138 (req, obj, obj->base.pending_write_domain); 1139 if (ret) 1140 return ret; 1141 } 1142 1143 /* Unconditionally flush any chipset caches (for streaming writes). */ 1144 i915_gem_chipset_flush(req->engine->i915); 1145 1146 /* Unconditionally invalidate GPU caches and TLBs. */ 1147 return req->engine->emit_flush(req, EMIT_INVALIDATE); 1148 } 1149 1150 static bool 1151 i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) 1152 { 1153 if (exec->flags & __I915_EXEC_UNKNOWN_FLAGS) 1154 return false; 1155 1156 /* Kernel clipping was a DRI1 misfeature */ 1157 if (exec->num_cliprects || exec->cliprects_ptr) 1158 return false; 1159 1160 if (exec->DR4 == 0xffffffff) { 1161 DRM_DEBUG("UXA submitting garbage DR4, fixing up\n"); 1162 exec->DR4 = 0; 1163 } 1164 if (exec->DR1 || exec->DR4) 1165 return false; 1166 1167 if ((exec->batch_start_offset | exec->batch_len) & 0x7) 1168 return false; 1169 1170 return true; 1171 } 1172 1173 static int 1174 validate_exec_list(struct drm_device *dev, 1175 struct drm_i915_gem_exec_object2 *exec, 1176 int count) 1177 { 1178 unsigned relocs_total = 0; 1179 unsigned relocs_max = UINT_MAX / sizeof(struct drm_i915_gem_relocation_entry); 1180 unsigned invalid_flags; 1181 int i; 1182 1183 /* INTERNAL flags must not overlap with external ones */ 1184 BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS & ~__EXEC_OBJECT_UNKNOWN_FLAGS); 1185 1186 invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS; 1187 if (USES_FULL_PPGTT(dev)) 1188 invalid_flags |= EXEC_OBJECT_NEEDS_GTT; 1189 1190 for (i = 0; i < count; i++) { 1191 char __user *ptr = u64_to_user_ptr(exec[i].relocs_ptr); 1192 int length; /* limited by fault_in_pages_readable() */ 1193 1194 if (exec[i].flags & invalid_flags) 1195 return -EINVAL; 1196 1197 /* Offset can be used as input (EXEC_OBJECT_PINNED), reject 1198 * any non-page-aligned or non-canonical addresses. 1199 */ 1200 if (exec[i].flags & EXEC_OBJECT_PINNED) { 1201 if (exec[i].offset != 1202 gen8_canonical_addr(exec[i].offset & LINUX_PAGE_MASK)) 1203 return -EINVAL; 1204 } 1205 1206 /* From drm_mm perspective address space is continuous, 1207 * so from this point we're always using non-canonical 1208 * form internally. 1209 */ 1210 exec[i].offset = gen8_noncanonical_addr(exec[i].offset); 1211 1212 if (exec[i].alignment && !is_power_of_2(exec[i].alignment)) 1213 return -EINVAL; 1214 1215 /* pad_to_size was once a reserved field, so sanitize it */ 1216 if (exec[i].flags & EXEC_OBJECT_PAD_TO_SIZE) { 1217 if (offset_in_page(exec[i].pad_to_size)) 1218 return -EINVAL; 1219 } else { 1220 exec[i].pad_to_size = 0; 1221 } 1222 1223 /* First check for malicious input causing overflow in 1224 * the worst case where we need to allocate the entire 1225 * relocation tree as a single array. 1226 */ 1227 if (exec[i].relocation_count > relocs_max - relocs_total) 1228 return -EINVAL; 1229 relocs_total += exec[i].relocation_count; 1230 1231 length = exec[i].relocation_count * 1232 sizeof(struct drm_i915_gem_relocation_entry); 1233 /* 1234 * We must check that the entire relocation array is safe 1235 * to read, but since we may need to update the presumed 1236 * offsets during execution, check for full write access. 1237 */ 1238 #if 0 1239 if (!access_ok(VERIFY_WRITE, ptr, length)) 1240 return -EFAULT; 1241 #endif 1242 1243 if (likely(!i915.prefault_disable)) { 1244 if (fault_in_pages_readable(ptr, length)) 1245 return -EFAULT; 1246 } 1247 } 1248 1249 return 0; 1250 } 1251 1252 static struct i915_gem_context * 1253 i915_gem_validate_context(struct drm_device *dev, struct drm_file *file, 1254 struct intel_engine_cs *engine, const u32 ctx_id) 1255 { 1256 struct i915_gem_context *ctx; 1257 1258 ctx = i915_gem_context_lookup(file->driver_priv, ctx_id); 1259 if (IS_ERR(ctx)) 1260 return ctx; 1261 1262 if (i915_gem_context_is_banned(ctx)) { 1263 DRM_DEBUG("Context %u tried to submit while banned\n", ctx_id); 1264 return ERR_PTR(-EIO); 1265 } 1266 1267 return ctx; 1268 } 1269 1270 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) 1271 { 1272 return !(obj->cache_level == I915_CACHE_NONE || 1273 obj->cache_level == I915_CACHE_WT); 1274 } 1275 1276 void i915_vma_move_to_active(struct i915_vma *vma, 1277 struct drm_i915_gem_request *req, 1278 unsigned int flags) 1279 { 1280 struct drm_i915_gem_object *obj = vma->obj; 1281 const unsigned int idx = req->engine->id; 1282 1283 lockdep_assert_held(&req->i915->drm.struct_mutex); 1284 GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); 1285 1286 /* Add a reference if we're newly entering the active list. 1287 * The order in which we add operations to the retirement queue is 1288 * vital here: mark_active adds to the start of the callback list, 1289 * such that subsequent callbacks are called first. Therefore we 1290 * add the active reference first and queue for it to be dropped 1291 * *last*. 1292 */ 1293 if (!i915_vma_is_active(vma)) 1294 obj->active_count++; 1295 i915_vma_set_active(vma, idx); 1296 i915_gem_active_set(&vma->last_read[idx], req); 1297 list_move_tail(&vma->vm_link, &vma->vm->active_list); 1298 1299 if (flags & EXEC_OBJECT_WRITE) { 1300 if (intel_fb_obj_invalidate(obj, ORIGIN_CS)) 1301 i915_gem_active_set(&obj->frontbuffer_write, req); 1302 1303 /* update for the implicit flush after a batch */ 1304 obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; 1305 if (!obj->cache_dirty && gpu_write_needs_clflush(obj)) 1306 obj->cache_dirty = true; 1307 } 1308 1309 if (flags & EXEC_OBJECT_NEEDS_FENCE) 1310 i915_gem_active_set(&vma->last_fence, req); 1311 } 1312 1313 static void eb_export_fence(struct drm_i915_gem_object *obj, 1314 struct drm_i915_gem_request *req, 1315 unsigned int flags) 1316 { 1317 struct reservation_object *resv = obj->resv; 1318 1319 /* Ignore errors from failing to allocate the new fence, we can't 1320 * handle an error right now. Worst case should be missed 1321 * synchronisation leading to rendering corruption. 1322 */ 1323 reservation_object_lock(resv, NULL); 1324 if (flags & EXEC_OBJECT_WRITE) 1325 reservation_object_add_excl_fence(resv, &req->fence); 1326 else if (reservation_object_reserve_shared(resv) == 0) 1327 reservation_object_add_shared_fence(resv, &req->fence); 1328 reservation_object_unlock(resv); 1329 } 1330 1331 static void 1332 i915_gem_execbuffer_move_to_active(struct list_head *vmas, 1333 struct drm_i915_gem_request *req) 1334 { 1335 struct i915_vma *vma; 1336 1337 list_for_each_entry(vma, vmas, exec_list) { 1338 struct drm_i915_gem_object *obj = vma->obj; 1339 1340 obj->base.write_domain = obj->base.pending_write_domain; 1341 if (obj->base.write_domain) 1342 vma->exec_entry->flags |= EXEC_OBJECT_WRITE; 1343 else 1344 obj->base.pending_read_domains |= obj->base.read_domains; 1345 obj->base.read_domains = obj->base.pending_read_domains; 1346 1347 i915_vma_move_to_active(vma, req, vma->exec_entry->flags); 1348 eb_export_fence(obj, req, vma->exec_entry->flags); 1349 } 1350 } 1351 1352 static int 1353 i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req) 1354 { 1355 u32 *cs; 1356 int i; 1357 1358 if (!IS_GEN7(req->i915) || req->engine->id != RCS) { 1359 DRM_DEBUG("sol reset is gen7/rcs only\n"); 1360 return -EINVAL; 1361 } 1362 1363 cs = intel_ring_begin(req, 4 * 3); 1364 if (IS_ERR(cs)) 1365 return PTR_ERR(cs); 1366 1367 for (i = 0; i < 4; i++) { 1368 *cs++ = MI_LOAD_REGISTER_IMM(1); 1369 *cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i)); 1370 *cs++ = 0; 1371 } 1372 1373 intel_ring_advance(req, cs); 1374 1375 return 0; 1376 } 1377 1378 static struct i915_vma * 1379 i915_gem_execbuffer_parse(struct intel_engine_cs *engine, 1380 struct drm_i915_gem_exec_object2 *shadow_exec_entry, 1381 struct drm_i915_gem_object *batch_obj, 1382 struct eb_vmas *eb, 1383 u32 batch_start_offset, 1384 u32 batch_len, 1385 bool is_master) 1386 { 1387 struct drm_i915_gem_object *shadow_batch_obj; 1388 struct i915_vma *vma; 1389 int ret; 1390 1391 shadow_batch_obj = i915_gem_batch_pool_get(&engine->batch_pool, 1392 PAGE_ALIGN(batch_len)); 1393 if (IS_ERR(shadow_batch_obj)) 1394 return ERR_CAST(shadow_batch_obj); 1395 1396 ret = intel_engine_cmd_parser(engine, 1397 batch_obj, 1398 shadow_batch_obj, 1399 batch_start_offset, 1400 batch_len, 1401 is_master); 1402 if (ret) { 1403 if (ret == -EACCES) /* unhandled chained batch */ 1404 vma = NULL; 1405 else 1406 vma = ERR_PTR(ret); 1407 goto out; 1408 } 1409 1410 vma = i915_gem_object_ggtt_pin(shadow_batch_obj, NULL, 0, 0, 0); 1411 if (IS_ERR(vma)) 1412 goto out; 1413 1414 memset(shadow_exec_entry, 0, sizeof(*shadow_exec_entry)); 1415 1416 vma->exec_entry = shadow_exec_entry; 1417 vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN; 1418 i915_gem_object_get(shadow_batch_obj); 1419 list_add_tail(&vma->exec_list, &eb->vmas); 1420 1421 out: 1422 i915_gem_object_unpin_pages(shadow_batch_obj); 1423 return vma; 1424 } 1425 1426 static void 1427 add_to_client(struct drm_i915_gem_request *req, 1428 struct drm_file *file) 1429 { 1430 req->file_priv = file->driver_priv; 1431 list_add_tail(&req->client_link, &req->file_priv->mm.request_list); 1432 } 1433 1434 static int 1435 execbuf_submit(struct i915_execbuffer_params *params, 1436 struct drm_i915_gem_execbuffer2 *args, 1437 struct list_head *vmas) 1438 { 1439 u64 exec_start, exec_len; 1440 int ret; 1441 1442 ret = i915_gem_execbuffer_move_to_gpu(params->request, vmas); 1443 if (ret) 1444 return ret; 1445 1446 ret = i915_switch_context(params->request); 1447 if (ret) 1448 return ret; 1449 1450 if (args->flags & I915_EXEC_CONSTANTS_MASK) { 1451 DRM_DEBUG("I915_EXEC_CONSTANTS_* unsupported\n"); 1452 return -EINVAL; 1453 } 1454 1455 if (args->flags & I915_EXEC_GEN7_SOL_RESET) { 1456 ret = i915_reset_gen7_sol_offsets(params->request); 1457 if (ret) 1458 return ret; 1459 } 1460 1461 exec_len = args->batch_len; 1462 exec_start = params->batch->node.start + 1463 params->args_batch_start_offset; 1464 1465 if (exec_len == 0) 1466 exec_len = params->batch->size - params->args_batch_start_offset; 1467 1468 ret = params->engine->emit_bb_start(params->request, 1469 exec_start, exec_len, 1470 params->dispatch_flags); 1471 if (ret) 1472 return ret; 1473 1474 i915_gem_execbuffer_move_to_active(vmas, params->request); 1475 1476 return 0; 1477 } 1478 1479 /** 1480 * Find one BSD ring to dispatch the corresponding BSD command. 1481 * The engine index is returned. 1482 */ 1483 static unsigned int 1484 gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv, 1485 struct drm_file *file) 1486 { 1487 struct drm_i915_file_private *file_priv = file->driver_priv; 1488 1489 /* Check whether the file_priv has already selected one ring. */ 1490 if ((int)file_priv->bsd_engine < 0) 1491 file_priv->bsd_engine = atomic_fetch_xor(1, 1492 &dev_priv->mm.bsd_engine_dispatch_index); 1493 1494 return file_priv->bsd_engine; 1495 } 1496 1497 #define I915_USER_RINGS (4) 1498 1499 static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = { 1500 [I915_EXEC_DEFAULT] = RCS, 1501 [I915_EXEC_RENDER] = RCS, 1502 [I915_EXEC_BLT] = BCS, 1503 [I915_EXEC_BSD] = VCS, 1504 [I915_EXEC_VEBOX] = VECS 1505 }; 1506 1507 static struct intel_engine_cs * 1508 eb_select_engine(struct drm_i915_private *dev_priv, 1509 struct drm_file *file, 1510 struct drm_i915_gem_execbuffer2 *args) 1511 { 1512 unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK; 1513 struct intel_engine_cs *engine; 1514 1515 if (user_ring_id > I915_USER_RINGS) { 1516 DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id); 1517 return NULL; 1518 } 1519 1520 if ((user_ring_id != I915_EXEC_BSD) && 1521 ((args->flags & I915_EXEC_BSD_MASK) != 0)) { 1522 DRM_DEBUG("execbuf with non bsd ring but with invalid " 1523 "bsd dispatch flags: %d\n", (int)(args->flags)); 1524 return NULL; 1525 } 1526 1527 if (user_ring_id == I915_EXEC_BSD && HAS_BSD2(dev_priv)) { 1528 unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK; 1529 1530 if (bsd_idx == I915_EXEC_BSD_DEFAULT) { 1531 bsd_idx = gen8_dispatch_bsd_engine(dev_priv, file); 1532 } else if (bsd_idx >= I915_EXEC_BSD_RING1 && 1533 bsd_idx <= I915_EXEC_BSD_RING2) { 1534 bsd_idx >>= I915_EXEC_BSD_SHIFT; 1535 bsd_idx--; 1536 } else { 1537 DRM_DEBUG("execbuf with unknown bsd ring: %u\n", 1538 bsd_idx); 1539 return NULL; 1540 } 1541 1542 engine = dev_priv->engine[_VCS(bsd_idx)]; 1543 } else { 1544 engine = dev_priv->engine[user_ring_map[user_ring_id]]; 1545 } 1546 1547 if (!engine) { 1548 DRM_DEBUG("execbuf with invalid ring: %u\n", user_ring_id); 1549 return NULL; 1550 } 1551 1552 return engine; 1553 } 1554 1555 static int 1556 i915_gem_do_execbuffer(struct drm_device *dev, void *data, 1557 struct drm_file *file, 1558 struct drm_i915_gem_execbuffer2 *args, 1559 struct drm_i915_gem_exec_object2 *exec) 1560 { 1561 struct drm_i915_private *dev_priv = to_i915(dev); 1562 struct i915_ggtt *ggtt = &dev_priv->ggtt; 1563 struct eb_vmas *eb; 1564 struct drm_i915_gem_exec_object2 shadow_exec_entry; 1565 struct intel_engine_cs *engine; 1566 struct i915_gem_context *ctx; 1567 struct i915_address_space *vm; 1568 struct i915_execbuffer_params params_master; /* XXX: will be removed later */ 1569 struct i915_execbuffer_params *params = ¶ms_master; 1570 const u32 ctx_id = i915_execbuffer2_get_context_id(*args); 1571 u32 dispatch_flags; 1572 struct dma_fence *in_fence = NULL; 1573 struct sync_file *out_fence = NULL; 1574 int out_fence_fd = -1; 1575 int ret; 1576 bool need_relocs; 1577 1578 if (!i915_gem_check_execbuffer(args)) 1579 return -EINVAL; 1580 1581 ret = validate_exec_list(dev, exec, args->buffer_count); 1582 if (ret) 1583 return ret; 1584 1585 dispatch_flags = 0; 1586 if (args->flags & I915_EXEC_SECURE) { 1587 #if 0 1588 if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN)) 1589 return -EPERM; 1590 #endif 1591 1592 dispatch_flags |= I915_DISPATCH_SECURE; 1593 } 1594 if (args->flags & I915_EXEC_IS_PINNED) 1595 dispatch_flags |= I915_DISPATCH_PINNED; 1596 1597 engine = eb_select_engine(dev_priv, file, args); 1598 if (!engine) 1599 return -EINVAL; 1600 1601 if (args->buffer_count < 1) { 1602 DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count); 1603 return -EINVAL; 1604 } 1605 1606 if (args->flags & I915_EXEC_RESOURCE_STREAMER) { 1607 if (!HAS_RESOURCE_STREAMER(dev_priv)) { 1608 DRM_DEBUG("RS is only allowed for Haswell, Gen8 and above\n"); 1609 return -EINVAL; 1610 } 1611 if (engine->id != RCS) { 1612 DRM_DEBUG("RS is not available on %s\n", 1613 engine->name); 1614 return -EINVAL; 1615 } 1616 1617 dispatch_flags |= I915_DISPATCH_RS; 1618 } 1619 1620 if (args->flags & I915_EXEC_FENCE_IN) { 1621 in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2)); 1622 if (!in_fence) 1623 return -EINVAL; 1624 } 1625 1626 if (args->flags & I915_EXEC_FENCE_OUT) { 1627 out_fence_fd = get_unused_fd_flags(O_CLOEXEC); 1628 if (out_fence_fd < 0) { 1629 ret = out_fence_fd; 1630 goto err_in_fence; 1631 } 1632 } 1633 1634 /* Take a local wakeref for preparing to dispatch the execbuf as 1635 * we expect to access the hardware fairly frequently in the 1636 * process. Upon first dispatch, we acquire another prolonged 1637 * wakeref that we hold until the GPU has been idle for at least 1638 * 100ms. 1639 */ 1640 intel_runtime_pm_get(dev_priv); 1641 1642 ret = i915_mutex_lock_interruptible(dev); 1643 if (ret) 1644 goto pre_mutex_err; 1645 1646 ctx = i915_gem_validate_context(dev, file, engine, ctx_id); 1647 if (IS_ERR(ctx)) { 1648 mutex_unlock(&dev->struct_mutex); 1649 ret = PTR_ERR(ctx); 1650 goto pre_mutex_err; 1651 } 1652 1653 i915_gem_context_get(ctx); 1654 1655 if (ctx->ppgtt) 1656 vm = &ctx->ppgtt->base; 1657 else 1658 vm = &ggtt->base; 1659 1660 memset(¶ms_master, 0x00, sizeof(params_master)); 1661 1662 eb = eb_create(dev_priv, args); 1663 if (eb == NULL) { 1664 i915_gem_context_put(ctx); 1665 mutex_unlock(&dev->struct_mutex); 1666 ret = -ENOMEM; 1667 goto pre_mutex_err; 1668 } 1669 1670 /* Look up object handles */ 1671 ret = eb_lookup_vmas(eb, exec, args, vm, file); 1672 if (ret) 1673 goto err; 1674 1675 /* take note of the batch buffer before we might reorder the lists */ 1676 params->batch = eb_get_batch(eb); 1677 1678 /* Move the objects en-masse into the GTT, evicting if necessary. */ 1679 need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0; 1680 ret = i915_gem_execbuffer_reserve(engine, &eb->vmas, ctx, 1681 &need_relocs); 1682 if (ret) 1683 goto err; 1684 1685 /* The objects are in their final locations, apply the relocations. */ 1686 if (need_relocs) 1687 ret = i915_gem_execbuffer_relocate(eb); 1688 if (ret) { 1689 if (ret == -EFAULT) { 1690 ret = i915_gem_execbuffer_relocate_slow(dev, args, file, 1691 engine, 1692 eb, exec, ctx); 1693 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1694 } 1695 if (ret) 1696 goto err; 1697 } 1698 1699 /* Set the pending read domains for the batch buffer to COMMAND */ 1700 if (params->batch->obj->base.pending_write_domain) { 1701 DRM_DEBUG("Attempting to use self-modifying batch buffer\n"); 1702 ret = -EINVAL; 1703 goto err; 1704 } 1705 if (args->batch_start_offset > params->batch->size || 1706 args->batch_len > params->batch->size - args->batch_start_offset) { 1707 DRM_DEBUG("Attempting to use out-of-bounds batch\n"); 1708 ret = -EINVAL; 1709 goto err; 1710 } 1711 1712 params->args_batch_start_offset = args->batch_start_offset; 1713 if (engine->needs_cmd_parser && args->batch_len) { 1714 struct i915_vma *vma; 1715 1716 vma = i915_gem_execbuffer_parse(engine, &shadow_exec_entry, 1717 params->batch->obj, 1718 eb, 1719 args->batch_start_offset, 1720 args->batch_len, 1721 drm_is_current_master(file)); 1722 if (IS_ERR(vma)) { 1723 ret = PTR_ERR(vma); 1724 goto err; 1725 } 1726 1727 if (vma) { 1728 /* 1729 * Batch parsed and accepted: 1730 * 1731 * Set the DISPATCH_SECURE bit to remove the NON_SECURE 1732 * bit from MI_BATCH_BUFFER_START commands issued in 1733 * the dispatch_execbuffer implementations. We 1734 * specifically don't want that set on batches the 1735 * command parser has accepted. 1736 */ 1737 dispatch_flags |= I915_DISPATCH_SECURE; 1738 params->args_batch_start_offset = 0; 1739 params->batch = vma; 1740 } 1741 } 1742 1743 params->batch->obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND; 1744 1745 /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure 1746 * batch" bit. Hence we need to pin secure batches into the global gtt. 1747 * hsw should have this fixed, but bdw mucks it up again. */ 1748 if (dispatch_flags & I915_DISPATCH_SECURE) { 1749 struct drm_i915_gem_object *obj = params->batch->obj; 1750 struct i915_vma *vma; 1751 1752 /* 1753 * So on first glance it looks freaky that we pin the batch here 1754 * outside of the reservation loop. But: 1755 * - The batch is already pinned into the relevant ppgtt, so we 1756 * already have the backing storage fully allocated. 1757 * - No other BO uses the global gtt (well contexts, but meh), 1758 * so we don't really have issues with multiple objects not 1759 * fitting due to fragmentation. 1760 * So this is actually safe. 1761 */ 1762 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); 1763 if (IS_ERR(vma)) { 1764 ret = PTR_ERR(vma); 1765 goto err; 1766 } 1767 1768 params->batch = vma; 1769 } 1770 1771 /* Allocate a request for this batch buffer nice and early. */ 1772 params->request = i915_gem_request_alloc(engine, ctx); 1773 if (IS_ERR(params->request)) { 1774 ret = PTR_ERR(params->request); 1775 goto err_batch_unpin; 1776 } 1777 1778 if (in_fence) { 1779 ret = i915_gem_request_await_dma_fence(params->request, 1780 in_fence); 1781 if (ret < 0) 1782 goto err_request; 1783 } 1784 1785 if (out_fence_fd != -1) { 1786 out_fence = sync_file_create(¶ms->request->fence); 1787 if (!out_fence) { 1788 ret = -ENOMEM; 1789 goto err_request; 1790 } 1791 } 1792 1793 /* Whilst this request exists, batch_obj will be on the 1794 * active_list, and so will hold the active reference. Only when this 1795 * request is retired will the the batch_obj be moved onto the 1796 * inactive_list and lose its active reference. Hence we do not need 1797 * to explicitly hold another reference here. 1798 */ 1799 params->request->batch = params->batch; 1800 1801 /* 1802 * Save assorted stuff away to pass through to *_submission(). 1803 * NB: This data should be 'persistent' and not local as it will 1804 * kept around beyond the duration of the IOCTL once the GPU 1805 * scheduler arrives. 1806 */ 1807 params->dev = dev; 1808 params->file = file; 1809 params->engine = engine; 1810 params->dispatch_flags = dispatch_flags; 1811 params->ctx = ctx; 1812 1813 trace_i915_gem_request_queue(params->request, dispatch_flags); 1814 1815 ret = execbuf_submit(params, args, &eb->vmas); 1816 err_request: 1817 __i915_add_request(params->request, ret == 0); 1818 add_to_client(params->request, file); 1819 1820 if (out_fence) { 1821 if (ret == 0) { 1822 fd_install(out_fence_fd, out_fence->file); 1823 args->rsvd2 &= GENMASK_ULL(0, 31); /* keep in-fence */ 1824 args->rsvd2 |= (u64)out_fence_fd << 32; 1825 out_fence_fd = -1; 1826 } else { 1827 fput(out_fence->file); 1828 } 1829 } 1830 1831 err_batch_unpin: 1832 /* 1833 * FIXME: We crucially rely upon the active tracking for the (ppgtt) 1834 * batch vma for correctness. For less ugly and less fragility this 1835 * needs to be adjusted to also track the ggtt batch vma properly as 1836 * active. 1837 */ 1838 if (dispatch_flags & I915_DISPATCH_SECURE) 1839 i915_vma_unpin(params->batch); 1840 err: 1841 /* the request owns the ref now */ 1842 i915_gem_context_put(ctx); 1843 eb_destroy(eb); 1844 1845 mutex_unlock(&dev->struct_mutex); 1846 1847 pre_mutex_err: 1848 /* intel_gpu_busy should also get a ref, so it will free when the device 1849 * is really idle. */ 1850 intel_runtime_pm_put(dev_priv); 1851 if (out_fence_fd != -1) 1852 put_unused_fd(out_fence_fd); 1853 err_in_fence: 1854 dma_fence_put(in_fence); 1855 return ret; 1856 } 1857 1858 /* 1859 * Legacy execbuffer just creates an exec2 list from the original exec object 1860 * list array and passes it to the real function. 1861 */ 1862 int 1863 i915_gem_execbuffer(struct drm_device *dev, void *data, 1864 struct drm_file *file) 1865 { 1866 struct drm_i915_gem_execbuffer *args = data; 1867 struct drm_i915_gem_execbuffer2 exec2; 1868 struct drm_i915_gem_exec_object *exec_list = NULL; 1869 struct drm_i915_gem_exec_object2 *exec2_list = NULL; 1870 int ret, i; 1871 1872 if (args->buffer_count < 1) { 1873 DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count); 1874 return -EINVAL; 1875 } 1876 1877 /* Copy in the exec list from userland */ 1878 exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count); 1879 exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count); 1880 if (exec_list == NULL || exec2_list == NULL) { 1881 DRM_DEBUG("Failed to allocate exec list for %d buffers\n", 1882 args->buffer_count); 1883 drm_free_large(exec_list); 1884 drm_free_large(exec2_list); 1885 return -ENOMEM; 1886 } 1887 ret = copy_from_user(exec_list, 1888 u64_to_user_ptr(args->buffers_ptr), 1889 sizeof(*exec_list) * args->buffer_count); 1890 if (ret != 0) { 1891 DRM_DEBUG("copy %d exec entries failed %d\n", 1892 args->buffer_count, ret); 1893 drm_free_large(exec_list); 1894 drm_free_large(exec2_list); 1895 return -EFAULT; 1896 } 1897 1898 for (i = 0; i < args->buffer_count; i++) { 1899 exec2_list[i].handle = exec_list[i].handle; 1900 exec2_list[i].relocation_count = exec_list[i].relocation_count; 1901 exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr; 1902 exec2_list[i].alignment = exec_list[i].alignment; 1903 exec2_list[i].offset = exec_list[i].offset; 1904 if (INTEL_GEN(to_i915(dev)) < 4) 1905 exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE; 1906 else 1907 exec2_list[i].flags = 0; 1908 } 1909 1910 exec2.buffers_ptr = args->buffers_ptr; 1911 exec2.buffer_count = args->buffer_count; 1912 exec2.batch_start_offset = args->batch_start_offset; 1913 exec2.batch_len = args->batch_len; 1914 exec2.DR1 = args->DR1; 1915 exec2.DR4 = args->DR4; 1916 exec2.num_cliprects = args->num_cliprects; 1917 exec2.cliprects_ptr = args->cliprects_ptr; 1918 exec2.flags = I915_EXEC_RENDER; 1919 i915_execbuffer2_set_context_id(exec2, 0); 1920 1921 ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list); 1922 if (!ret) { 1923 struct drm_i915_gem_exec_object __user *user_exec_list = 1924 u64_to_user_ptr(args->buffers_ptr); 1925 1926 /* Copy the new buffer offsets back to the user's exec list. */ 1927 for (i = 0; i < args->buffer_count; i++) { 1928 exec2_list[i].offset = 1929 gen8_canonical_addr(exec2_list[i].offset); 1930 ret = __copy_to_user(&user_exec_list[i].offset, 1931 &exec2_list[i].offset, 1932 sizeof(user_exec_list[i].offset)); 1933 if (ret) { 1934 ret = -EFAULT; 1935 DRM_DEBUG("failed to copy %d exec entries " 1936 "back to user (%d)\n", 1937 args->buffer_count, ret); 1938 break; 1939 } 1940 } 1941 } 1942 1943 drm_free_large(exec_list); 1944 drm_free_large(exec2_list); 1945 return ret; 1946 } 1947 1948 int 1949 i915_gem_execbuffer2(struct drm_device *dev, void *data, 1950 struct drm_file *file) 1951 { 1952 struct drm_i915_gem_execbuffer2 *args = data; 1953 struct drm_i915_gem_exec_object2 *exec2_list = NULL; 1954 int ret; 1955 1956 if (args->buffer_count < 1 || 1957 args->buffer_count > UINT_MAX / sizeof(*exec2_list)) { 1958 DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count); 1959 return -EINVAL; 1960 } 1961 1962 exec2_list = drm_malloc_gfp(args->buffer_count, 1963 sizeof(*exec2_list), 1964 GFP_TEMPORARY); 1965 if (exec2_list == NULL) { 1966 DRM_DEBUG("Failed to allocate exec list for %d buffers\n", 1967 args->buffer_count); 1968 return -ENOMEM; 1969 } 1970 ret = copy_from_user(exec2_list, 1971 u64_to_user_ptr(args->buffers_ptr), 1972 sizeof(*exec2_list) * args->buffer_count); 1973 if (ret != 0) { 1974 DRM_DEBUG("copy %d exec entries failed %d\n", 1975 args->buffer_count, ret); 1976 drm_free_large(exec2_list); 1977 return -EFAULT; 1978 } 1979 1980 ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list); 1981 if (!ret) { 1982 /* Copy the new buffer offsets back to the user's exec list. */ 1983 struct drm_i915_gem_exec_object2 __user *user_exec_list = 1984 u64_to_user_ptr(args->buffers_ptr); 1985 int i; 1986 1987 for (i = 0; i < args->buffer_count; i++) { 1988 exec2_list[i].offset = 1989 gen8_canonical_addr(exec2_list[i].offset); 1990 ret = __copy_to_user(&user_exec_list[i].offset, 1991 &exec2_list[i].offset, 1992 sizeof(user_exec_list[i].offset)); 1993 if (ret) { 1994 ret = -EFAULT; 1995 DRM_DEBUG("failed to copy %d exec entries " 1996 "back to user\n", 1997 args->buffer_count); 1998 break; 1999 } 2000 } 2001 } 2002 2003 drm_free_large(exec2_list); 2004 return ret; 2005 } 2006