1c349dbc7Sjsg /* 2c349dbc7Sjsg * SPDX-License-Identifier: MIT 3c349dbc7Sjsg * 4c349dbc7Sjsg * Copyright © 2008,2010 Intel Corporation 5c349dbc7Sjsg */ 6c349dbc7Sjsg 7c349dbc7Sjsg #include <linux/intel-iommu.h> 8c349dbc7Sjsg #include <linux/dma-resv.h> 9c349dbc7Sjsg #include <linux/sync_file.h> 10c349dbc7Sjsg #include <linux/uaccess.h> 11c349dbc7Sjsg 12c349dbc7Sjsg #include <drm/drm_syncobj.h> 13c349dbc7Sjsg 14c349dbc7Sjsg #include <dev/pci/pcivar.h> 15c349dbc7Sjsg #include <dev/pci/agpvar.h> 16c349dbc7Sjsg 17c349dbc7Sjsg #include "display/intel_frontbuffer.h" 18c349dbc7Sjsg 19c349dbc7Sjsg #include "gem/i915_gem_ioctls.h" 20c349dbc7Sjsg #include "gt/intel_context.h" 21*5ca02815Sjsg #include "gt/intel_gpu_commands.h" 22c349dbc7Sjsg #include "gt/intel_gt.h" 23ad8b1aafSjsg #include "gt/intel_gt_buffer_pool.h" 24c349dbc7Sjsg #include "gt/intel_gt_pm.h" 25c349dbc7Sjsg #include "gt/intel_ring.h" 26c349dbc7Sjsg 27c349dbc7Sjsg #include "i915_drv.h" 28c349dbc7Sjsg #include "i915_gem_clflush.h" 29c349dbc7Sjsg #include "i915_gem_context.h" 30c349dbc7Sjsg #include "i915_gem_ioctls.h" 31c349dbc7Sjsg #include "i915_trace.h" 32ad8b1aafSjsg #include "i915_user_extensions.h" 33c349dbc7Sjsg 34c349dbc7Sjsg struct eb_vma { 35c349dbc7Sjsg struct i915_vma *vma; 36c349dbc7Sjsg unsigned int flags; 37c349dbc7Sjsg 38c349dbc7Sjsg /** This vma's place in the execbuf reservation list */ 39c349dbc7Sjsg struct drm_i915_gem_exec_object2 *exec; 40c349dbc7Sjsg struct list_head bind_link; 41c349dbc7Sjsg struct list_head reloc_link; 42c349dbc7Sjsg 43c349dbc7Sjsg struct hlist_node node; 44c349dbc7Sjsg u32 handle; 45c349dbc7Sjsg }; 46c349dbc7Sjsg 47c349dbc7Sjsg enum { 48c349dbc7Sjsg FORCE_CPU_RELOC = 1, 49c349dbc7Sjsg FORCE_GTT_RELOC, 50c349dbc7Sjsg FORCE_GPU_RELOC, 51c349dbc7Sjsg #define DBG_FORCE_RELOC 0 /* choose one of the above! */ 52c349dbc7Sjsg }; 53c349dbc7Sjsg 54*5ca02815Sjsg /* __EXEC_OBJECT_NO_RESERVE is BIT(31), defined in i915_vma.h */ 55*5ca02815Sjsg #define __EXEC_OBJECT_HAS_PIN BIT(30) 56*5ca02815Sjsg #define __EXEC_OBJECT_HAS_FENCE BIT(29) 57*5ca02815Sjsg #define __EXEC_OBJECT_USERPTR_INIT BIT(28) 58*5ca02815Sjsg #define __EXEC_OBJECT_NEEDS_MAP BIT(27) 59*5ca02815Sjsg #define __EXEC_OBJECT_NEEDS_BIAS BIT(26) 60*5ca02815Sjsg #define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 26) /* all of the above + */ 61c349dbc7Sjsg #define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE) 62c349dbc7Sjsg 63c349dbc7Sjsg #define __EXEC_HAS_RELOC BIT(31) 64ad8b1aafSjsg #define __EXEC_ENGINE_PINNED BIT(30) 65*5ca02815Sjsg #define __EXEC_USERPTR_USED BIT(29) 66*5ca02815Sjsg #define __EXEC_INTERNAL_FLAGS (~0u << 29) 67c349dbc7Sjsg #define UPDATE PIN_OFFSET_FIXED 68c349dbc7Sjsg 69c349dbc7Sjsg #define BATCH_OFFSET_BIAS (256*1024) 70c349dbc7Sjsg 71c349dbc7Sjsg #define __I915_EXEC_ILLEGAL_FLAGS \ 72c349dbc7Sjsg (__I915_EXEC_UNKNOWN_FLAGS | \ 73c349dbc7Sjsg I915_EXEC_CONSTANTS_MASK | \ 74c349dbc7Sjsg I915_EXEC_RESOURCE_STREAMER) 75c349dbc7Sjsg 76c349dbc7Sjsg /* Catch emission of unexpected errors for CI! */ 77c349dbc7Sjsg #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) 78c349dbc7Sjsg #undef EINVAL 79c349dbc7Sjsg #define EINVAL ({ \ 80c349dbc7Sjsg DRM_DEBUG_DRIVER("EINVAL at %s:%d\n", __func__, __LINE__); \ 81c349dbc7Sjsg 22; \ 82c349dbc7Sjsg }) 83c349dbc7Sjsg #endif 84c349dbc7Sjsg 85c349dbc7Sjsg /** 86c349dbc7Sjsg * DOC: User command execution 87c349dbc7Sjsg * 88c349dbc7Sjsg * Userspace submits commands to be executed on the GPU as an instruction 89c349dbc7Sjsg * stream within a GEM object we call a batchbuffer. This instructions may 90c349dbc7Sjsg * refer to other GEM objects containing auxiliary state such as kernels, 91c349dbc7Sjsg * samplers, render targets and even secondary batchbuffers. Userspace does 92c349dbc7Sjsg * not know where in the GPU memory these objects reside and so before the 93c349dbc7Sjsg * batchbuffer is passed to the GPU for execution, those addresses in the 94c349dbc7Sjsg * batchbuffer and auxiliary objects are updated. This is known as relocation, 95c349dbc7Sjsg * or patching. To try and avoid having to relocate each object on the next 96c349dbc7Sjsg * execution, userspace is told the location of those objects in this pass, 97c349dbc7Sjsg * but this remains just a hint as the kernel may choose a new location for 98c349dbc7Sjsg * any object in the future. 99c349dbc7Sjsg * 100c349dbc7Sjsg * At the level of talking to the hardware, submitting a batchbuffer for the 101c349dbc7Sjsg * GPU to execute is to add content to a buffer from which the HW 102c349dbc7Sjsg * command streamer is reading. 103c349dbc7Sjsg * 104c349dbc7Sjsg * 1. Add a command to load the HW context. For Logical Ring Contexts, i.e. 105c349dbc7Sjsg * Execlists, this command is not placed on the same buffer as the 106c349dbc7Sjsg * remaining items. 107c349dbc7Sjsg * 108c349dbc7Sjsg * 2. Add a command to invalidate caches to the buffer. 109c349dbc7Sjsg * 110c349dbc7Sjsg * 3. Add a batchbuffer start command to the buffer; the start command is 111c349dbc7Sjsg * essentially a token together with the GPU address of the batchbuffer 112c349dbc7Sjsg * to be executed. 113c349dbc7Sjsg * 114c349dbc7Sjsg * 4. Add a pipeline flush to the buffer. 115c349dbc7Sjsg * 116c349dbc7Sjsg * 5. Add a memory write command to the buffer to record when the GPU 117c349dbc7Sjsg * is done executing the batchbuffer. The memory write writes the 118c349dbc7Sjsg * global sequence number of the request, ``i915_request::global_seqno``; 119c349dbc7Sjsg * the i915 driver uses the current value in the register to determine 120c349dbc7Sjsg * if the GPU has completed the batchbuffer. 121c349dbc7Sjsg * 122c349dbc7Sjsg * 6. Add a user interrupt command to the buffer. This command instructs 123c349dbc7Sjsg * the GPU to issue an interrupt when the command, pipeline flush and 124c349dbc7Sjsg * memory write are completed. 125c349dbc7Sjsg * 126c349dbc7Sjsg * 7. Inform the hardware of the additional commands added to the buffer 127c349dbc7Sjsg * (by updating the tail pointer). 128c349dbc7Sjsg * 129c349dbc7Sjsg * Processing an execbuf ioctl is conceptually split up into a few phases. 130c349dbc7Sjsg * 131c349dbc7Sjsg * 1. Validation - Ensure all the pointers, handles and flags are valid. 132c349dbc7Sjsg * 2. Reservation - Assign GPU address space for every object 133c349dbc7Sjsg * 3. Relocation - Update any addresses to point to the final locations 134c349dbc7Sjsg * 4. Serialisation - Order the request with respect to its dependencies 135c349dbc7Sjsg * 5. Construction - Construct a request to execute the batchbuffer 136c349dbc7Sjsg * 6. Submission (at some point in the future execution) 137c349dbc7Sjsg * 138c349dbc7Sjsg * Reserving resources for the execbuf is the most complicated phase. We 139c349dbc7Sjsg * neither want to have to migrate the object in the address space, nor do 140c349dbc7Sjsg * we want to have to update any relocations pointing to this object. Ideally, 141c349dbc7Sjsg * we want to leave the object where it is and for all the existing relocations 142c349dbc7Sjsg * to match. If the object is given a new address, or if userspace thinks the 143c349dbc7Sjsg * object is elsewhere, we have to parse all the relocation entries and update 144c349dbc7Sjsg * the addresses. Userspace can set the I915_EXEC_NORELOC flag to hint that 145c349dbc7Sjsg * all the target addresses in all of its objects match the value in the 146c349dbc7Sjsg * relocation entries and that they all match the presumed offsets given by the 147c349dbc7Sjsg * list of execbuffer objects. Using this knowledge, we know that if we haven't 148c349dbc7Sjsg * moved any buffers, all the relocation entries are valid and we can skip 149c349dbc7Sjsg * the update. (If userspace is wrong, the likely outcome is an impromptu GPU 150c349dbc7Sjsg * hang.) The requirement for using I915_EXEC_NO_RELOC are: 151c349dbc7Sjsg * 152c349dbc7Sjsg * The addresses written in the objects must match the corresponding 153c349dbc7Sjsg * reloc.presumed_offset which in turn must match the corresponding 154c349dbc7Sjsg * execobject.offset. 155c349dbc7Sjsg * 156c349dbc7Sjsg * Any render targets written to in the batch must be flagged with 157c349dbc7Sjsg * EXEC_OBJECT_WRITE. 158c349dbc7Sjsg * 159c349dbc7Sjsg * To avoid stalling, execobject.offset should match the current 160c349dbc7Sjsg * address of that object within the active context. 161c349dbc7Sjsg * 162c349dbc7Sjsg * The reservation is done is multiple phases. First we try and keep any 163c349dbc7Sjsg * object already bound in its current location - so as long as meets the 164c349dbc7Sjsg * constraints imposed by the new execbuffer. Any object left unbound after the 165c349dbc7Sjsg * first pass is then fitted into any available idle space. If an object does 166c349dbc7Sjsg * not fit, all objects are removed from the reservation and the process rerun 167c349dbc7Sjsg * after sorting the objects into a priority order (more difficult to fit 168c349dbc7Sjsg * objects are tried first). Failing that, the entire VM is cleared and we try 169c349dbc7Sjsg * to fit the execbuf once last time before concluding that it simply will not 170c349dbc7Sjsg * fit. 171c349dbc7Sjsg * 172c349dbc7Sjsg * A small complication to all of this is that we allow userspace not only to 173c349dbc7Sjsg * specify an alignment and a size for the object in the address space, but 174c349dbc7Sjsg * we also allow userspace to specify the exact offset. This objects are 175c349dbc7Sjsg * simpler to place (the location is known a priori) all we have to do is make 176c349dbc7Sjsg * sure the space is available. 177c349dbc7Sjsg * 178c349dbc7Sjsg * Once all the objects are in place, patching up the buried pointers to point 179c349dbc7Sjsg * to the final locations is a fairly simple job of walking over the relocation 180c349dbc7Sjsg * entry arrays, looking up the right address and rewriting the value into 181c349dbc7Sjsg * the object. Simple! ... The relocation entries are stored in user memory 182c349dbc7Sjsg * and so to access them we have to copy them into a local buffer. That copy 183c349dbc7Sjsg * has to avoid taking any pagefaults as they may lead back to a GEM object 184c349dbc7Sjsg * requiring the struct_mutex (i.e. recursive deadlock). So once again we split 185c349dbc7Sjsg * the relocation into multiple passes. First we try to do everything within an 186c349dbc7Sjsg * atomic context (avoid the pagefaults) which requires that we never wait. If 187c349dbc7Sjsg * we detect that we may wait, or if we need to fault, then we have to fallback 188c349dbc7Sjsg * to a slower path. The slowpath has to drop the mutex. (Can you hear alarm 189c349dbc7Sjsg * bells yet?) Dropping the mutex means that we lose all the state we have 190c349dbc7Sjsg * built up so far for the execbuf and we must reset any global data. However, 191c349dbc7Sjsg * we do leave the objects pinned in their final locations - which is a 192c349dbc7Sjsg * potential issue for concurrent execbufs. Once we have left the mutex, we can 193c349dbc7Sjsg * allocate and copy all the relocation entries into a large array at our 194c349dbc7Sjsg * leisure, reacquire the mutex, reclaim all the objects and other state and 195c349dbc7Sjsg * then proceed to update any incorrect addresses with the objects. 196c349dbc7Sjsg * 197c349dbc7Sjsg * As we process the relocation entries, we maintain a record of whether the 198c349dbc7Sjsg * object is being written to. Using NORELOC, we expect userspace to provide 199c349dbc7Sjsg * this information instead. We also check whether we can skip the relocation 200c349dbc7Sjsg * by comparing the expected value inside the relocation entry with the target's 201c349dbc7Sjsg * final address. If they differ, we have to map the current object and rewrite 202c349dbc7Sjsg * the 4 or 8 byte pointer within. 203c349dbc7Sjsg * 204c349dbc7Sjsg * Serialising an execbuf is quite simple according to the rules of the GEM 205c349dbc7Sjsg * ABI. Execution within each context is ordered by the order of submission. 206c349dbc7Sjsg * Writes to any GEM object are in order of submission and are exclusive. Reads 207c349dbc7Sjsg * from a GEM object are unordered with respect to other reads, but ordered by 208c349dbc7Sjsg * writes. A write submitted after a read cannot occur before the read, and 209c349dbc7Sjsg * similarly any read submitted after a write cannot occur before the write. 210c349dbc7Sjsg * Writes are ordered between engines such that only one write occurs at any 211c349dbc7Sjsg * time (completing any reads beforehand) - using semaphores where available 212c349dbc7Sjsg * and CPU serialisation otherwise. Other GEM access obey the same rules, any 213c349dbc7Sjsg * write (either via mmaps using set-domain, or via pwrite) must flush all GPU 214c349dbc7Sjsg * reads before starting, and any read (either using set-domain or pread) must 215c349dbc7Sjsg * flush all GPU writes before starting. (Note we only employ a barrier before, 216c349dbc7Sjsg * we currently rely on userspace not concurrently starting a new execution 217c349dbc7Sjsg * whilst reading or writing to an object. This may be an advantage or not 218c349dbc7Sjsg * depending on how much you trust userspace not to shoot themselves in the 219c349dbc7Sjsg * foot.) Serialisation may just result in the request being inserted into 220c349dbc7Sjsg * a DAG awaiting its turn, but most simple is to wait on the CPU until 221c349dbc7Sjsg * all dependencies are resolved. 222c349dbc7Sjsg * 223c349dbc7Sjsg * After all of that, is just a matter of closing the request and handing it to 224c349dbc7Sjsg * the hardware (well, leaving it in a queue to be executed). However, we also 225c349dbc7Sjsg * offer the ability for batchbuffers to be run with elevated privileges so 226c349dbc7Sjsg * that they access otherwise hidden registers. (Used to adjust L3 cache etc.) 227c349dbc7Sjsg * Before any batch is given extra privileges we first must check that it 228c349dbc7Sjsg * contains no nefarious instructions, we check that each instruction is from 229c349dbc7Sjsg * our whitelist and all registers are also from an allowed list. We first 230c349dbc7Sjsg * copy the user's batchbuffer to a shadow (so that the user doesn't have 231c349dbc7Sjsg * access to it, either by the CPU or GPU as we scan it) and then parse each 232c349dbc7Sjsg * instruction. If everything is ok, we set a flag telling the hardware to run 233c349dbc7Sjsg * the batchbuffer in trusted mode, otherwise the ioctl is rejected. 234c349dbc7Sjsg */ 235c349dbc7Sjsg 236ad8b1aafSjsg struct eb_fence { 237ad8b1aafSjsg struct drm_syncobj *syncobj; /* Use with ptr_mask_bits() */ 238ad8b1aafSjsg struct dma_fence *dma_fence; 239ad8b1aafSjsg u64 value; 240ad8b1aafSjsg struct dma_fence_chain *chain_fence; 241ad8b1aafSjsg }; 242ad8b1aafSjsg 243c349dbc7Sjsg struct i915_execbuffer { 244c349dbc7Sjsg struct drm_i915_private *i915; /** i915 backpointer */ 245c349dbc7Sjsg struct drm_file *file; /** per-file lookup tables and limits */ 246c349dbc7Sjsg struct drm_i915_gem_execbuffer2 *args; /** ioctl parameters */ 247c349dbc7Sjsg struct drm_i915_gem_exec_object2 *exec; /** ioctl execobj[] */ 248c349dbc7Sjsg struct eb_vma *vma; 249c349dbc7Sjsg 250c349dbc7Sjsg struct intel_engine_cs *engine; /** engine to queue the request to */ 251c349dbc7Sjsg struct intel_context *context; /* logical state for the request */ 252c349dbc7Sjsg struct i915_gem_context *gem_context; /** caller's context */ 253c349dbc7Sjsg 254c349dbc7Sjsg struct i915_request *request; /** our request to build */ 255c349dbc7Sjsg struct eb_vma *batch; /** identity of the batch obj/vma */ 256c349dbc7Sjsg struct i915_vma *trampoline; /** trampoline used for chaining */ 257c349dbc7Sjsg 258c349dbc7Sjsg /** actual size of execobj[] as we may extend it for the cmdparser */ 259c349dbc7Sjsg unsigned int buffer_count; 260c349dbc7Sjsg 261c349dbc7Sjsg /** list of vma not yet bound during reservation phase */ 262c349dbc7Sjsg struct list_head unbound; 263c349dbc7Sjsg 264c349dbc7Sjsg /** list of vma that have execobj.relocation_count */ 265c349dbc7Sjsg struct list_head relocs; 266c349dbc7Sjsg 267ad8b1aafSjsg struct i915_gem_ww_ctx ww; 268ad8b1aafSjsg 269c349dbc7Sjsg /** 270c349dbc7Sjsg * Track the most recently used object for relocations, as we 271c349dbc7Sjsg * frequently have to perform multiple relocations within the same 272c349dbc7Sjsg * obj/page 273c349dbc7Sjsg */ 274c349dbc7Sjsg struct reloc_cache { 275c349dbc7Sjsg struct drm_mm_node node; /** temporary GTT binding */ 276c349dbc7Sjsg unsigned long vaddr; /** Current kmap address */ 277c349dbc7Sjsg unsigned long page; /** Currently mapped page index */ 278*5ca02815Sjsg unsigned int graphics_ver; /** Cached value of GRAPHICS_VER */ 279c349dbc7Sjsg bool use_64bit_reloc : 1; 280c349dbc7Sjsg bool has_llc : 1; 281c349dbc7Sjsg bool has_fence : 1; 282c349dbc7Sjsg bool needs_unfenced : 1; 283c349dbc7Sjsg 284c349dbc7Sjsg struct agp_map *map; 285c349dbc7Sjsg bus_space_tag_t iot; 286c349dbc7Sjsg bus_space_handle_t ioh; 287c349dbc7Sjsg } reloc_cache; 288c349dbc7Sjsg 289c349dbc7Sjsg u64 invalid_flags; /** Set of execobj.flags that are invalid */ 290c349dbc7Sjsg 291ad8b1aafSjsg u64 batch_len; /** Length of batch within object */ 292c349dbc7Sjsg u32 batch_start_offset; /** Location within object of batch */ 293c349dbc7Sjsg u32 batch_flags; /** Flags composed for emit_bb_start() */ 294ad8b1aafSjsg struct intel_gt_buffer_pool_node *batch_pool; /** pool node for batch buffer */ 295c349dbc7Sjsg 296c349dbc7Sjsg /** 297c349dbc7Sjsg * Indicate either the size of the hastable used to resolve 298c349dbc7Sjsg * relocation handles, or if negative that we are using a direct 299c349dbc7Sjsg * index into the execobj[]. 300c349dbc7Sjsg */ 301c349dbc7Sjsg int lut_size; 302c349dbc7Sjsg struct hlist_head *buckets; /** ht for relocation handles */ 303ad8b1aafSjsg 304ad8b1aafSjsg struct eb_fence *fences; 305ad8b1aafSjsg unsigned long num_fences; 306c349dbc7Sjsg }; 307c349dbc7Sjsg 308ad8b1aafSjsg static int eb_parse(struct i915_execbuffer *eb); 309ad8b1aafSjsg static struct i915_request *eb_pin_engine(struct i915_execbuffer *eb, 310ad8b1aafSjsg bool throttle); 311ad8b1aafSjsg static void eb_unpin_engine(struct i915_execbuffer *eb); 312ad8b1aafSjsg 313c349dbc7Sjsg static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb) 314c349dbc7Sjsg { 315c349dbc7Sjsg return intel_engine_requires_cmd_parser(eb->engine) || 316c349dbc7Sjsg (intel_engine_using_cmd_parser(eb->engine) && 317c349dbc7Sjsg eb->args->batch_len); 318c349dbc7Sjsg } 319c349dbc7Sjsg 320c349dbc7Sjsg static int eb_create(struct i915_execbuffer *eb) 321c349dbc7Sjsg { 322c349dbc7Sjsg if (!(eb->args->flags & I915_EXEC_HANDLE_LUT)) { 323c349dbc7Sjsg unsigned int size = 1 + ilog2(eb->buffer_count); 324c349dbc7Sjsg 325c349dbc7Sjsg /* 326c349dbc7Sjsg * Without a 1:1 association between relocation handles and 327c349dbc7Sjsg * the execobject[] index, we instead create a hashtable. 328c349dbc7Sjsg * We size it dynamically based on available memory, starting 329c349dbc7Sjsg * first with 1:1 assocative hash and scaling back until 330c349dbc7Sjsg * the allocation succeeds. 331c349dbc7Sjsg * 332c349dbc7Sjsg * Later on we use a positive lut_size to indicate we are 333c349dbc7Sjsg * using this hashtable, and a negative value to indicate a 334c349dbc7Sjsg * direct lookup. 335c349dbc7Sjsg */ 336c349dbc7Sjsg do { 337c349dbc7Sjsg gfp_t flags; 338c349dbc7Sjsg 339c349dbc7Sjsg /* While we can still reduce the allocation size, don't 340c349dbc7Sjsg * raise a warning and allow the allocation to fail. 341c349dbc7Sjsg * On the last pass though, we want to try as hard 342c349dbc7Sjsg * as possible to perform the allocation and warn 343c349dbc7Sjsg * if it fails. 344c349dbc7Sjsg */ 345c349dbc7Sjsg flags = GFP_KERNEL; 346c349dbc7Sjsg if (size > 1) 347c349dbc7Sjsg flags |= __GFP_NORETRY | __GFP_NOWARN; 348c349dbc7Sjsg 349c349dbc7Sjsg eb->buckets = kzalloc(sizeof(struct hlist_head) << size, 350c349dbc7Sjsg flags); 351c349dbc7Sjsg if (eb->buckets) 352c349dbc7Sjsg break; 353c349dbc7Sjsg } while (--size); 354c349dbc7Sjsg 355c349dbc7Sjsg if (unlikely(!size)) 356c349dbc7Sjsg return -ENOMEM; 357c349dbc7Sjsg 358c349dbc7Sjsg eb->lut_size = size; 359c349dbc7Sjsg } else { 360c349dbc7Sjsg eb->lut_size = -eb->buffer_count; 361c349dbc7Sjsg } 362c349dbc7Sjsg 363c349dbc7Sjsg return 0; 364c349dbc7Sjsg } 365c349dbc7Sjsg 366c349dbc7Sjsg static bool 367c349dbc7Sjsg eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry, 368c349dbc7Sjsg const struct i915_vma *vma, 369c349dbc7Sjsg unsigned int flags) 370c349dbc7Sjsg { 371c349dbc7Sjsg if (vma->node.size < entry->pad_to_size) 372c349dbc7Sjsg return true; 373c349dbc7Sjsg 374c349dbc7Sjsg if (entry->alignment && !IS_ALIGNED(vma->node.start, entry->alignment)) 375c349dbc7Sjsg return true; 376c349dbc7Sjsg 377c349dbc7Sjsg if (flags & EXEC_OBJECT_PINNED && 378c349dbc7Sjsg vma->node.start != entry->offset) 379c349dbc7Sjsg return true; 380c349dbc7Sjsg 381c349dbc7Sjsg if (flags & __EXEC_OBJECT_NEEDS_BIAS && 382c349dbc7Sjsg vma->node.start < BATCH_OFFSET_BIAS) 383c349dbc7Sjsg return true; 384c349dbc7Sjsg 385c349dbc7Sjsg if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) && 386ad8b1aafSjsg (vma->node.start + vma->node.size + 4095) >> 32) 387c349dbc7Sjsg return true; 388c349dbc7Sjsg 389c349dbc7Sjsg if (flags & __EXEC_OBJECT_NEEDS_MAP && 390c349dbc7Sjsg !i915_vma_is_map_and_fenceable(vma)) 391c349dbc7Sjsg return true; 392c349dbc7Sjsg 393c349dbc7Sjsg return false; 394c349dbc7Sjsg } 395c349dbc7Sjsg 396ad8b1aafSjsg static u64 eb_pin_flags(const struct drm_i915_gem_exec_object2 *entry, 397ad8b1aafSjsg unsigned int exec_flags) 398ad8b1aafSjsg { 399ad8b1aafSjsg u64 pin_flags = 0; 400ad8b1aafSjsg 401ad8b1aafSjsg if (exec_flags & EXEC_OBJECT_NEEDS_GTT) 402ad8b1aafSjsg pin_flags |= PIN_GLOBAL; 403ad8b1aafSjsg 404ad8b1aafSjsg /* 405ad8b1aafSjsg * Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset, 406ad8b1aafSjsg * limit address to the first 4GBs for unflagged objects. 407ad8b1aafSjsg */ 408ad8b1aafSjsg if (!(exec_flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS)) 409ad8b1aafSjsg pin_flags |= PIN_ZONE_4G; 410ad8b1aafSjsg 411ad8b1aafSjsg if (exec_flags & __EXEC_OBJECT_NEEDS_MAP) 412ad8b1aafSjsg pin_flags |= PIN_MAPPABLE; 413ad8b1aafSjsg 414ad8b1aafSjsg if (exec_flags & EXEC_OBJECT_PINNED) 415ad8b1aafSjsg pin_flags |= entry->offset | PIN_OFFSET_FIXED; 416ad8b1aafSjsg else if (exec_flags & __EXEC_OBJECT_NEEDS_BIAS) 417ad8b1aafSjsg pin_flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS; 418ad8b1aafSjsg 419ad8b1aafSjsg return pin_flags; 420ad8b1aafSjsg } 421ad8b1aafSjsg 422*5ca02815Sjsg static inline int 423c349dbc7Sjsg eb_pin_vma(struct i915_execbuffer *eb, 424c349dbc7Sjsg const struct drm_i915_gem_exec_object2 *entry, 425c349dbc7Sjsg struct eb_vma *ev) 426c349dbc7Sjsg { 427c349dbc7Sjsg struct i915_vma *vma = ev->vma; 428c349dbc7Sjsg u64 pin_flags; 429*5ca02815Sjsg int err; 430c349dbc7Sjsg 431c349dbc7Sjsg if (vma->node.size) 432c349dbc7Sjsg pin_flags = vma->node.start; 433c349dbc7Sjsg else 434c349dbc7Sjsg pin_flags = entry->offset & PIN_OFFSET_MASK; 435c349dbc7Sjsg 436c349dbc7Sjsg pin_flags |= PIN_USER | PIN_NOEVICT | PIN_OFFSET_FIXED; 437c349dbc7Sjsg if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_GTT)) 438c349dbc7Sjsg pin_flags |= PIN_GLOBAL; 439c349dbc7Sjsg 440ad8b1aafSjsg /* Attempt to reuse the current location if available */ 441*5ca02815Sjsg err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, pin_flags); 442*5ca02815Sjsg if (err == -EDEADLK) 443*5ca02815Sjsg return err; 444*5ca02815Sjsg 445*5ca02815Sjsg if (unlikely(err)) { 446ad8b1aafSjsg if (entry->flags & EXEC_OBJECT_PINNED) 447*5ca02815Sjsg return err; 448c349dbc7Sjsg 449ad8b1aafSjsg /* Failing that pick any _free_ space if suitable */ 450*5ca02815Sjsg err = i915_vma_pin_ww(vma, &eb->ww, 451ad8b1aafSjsg entry->pad_to_size, 452ad8b1aafSjsg entry->alignment, 453ad8b1aafSjsg eb_pin_flags(entry, ev->flags) | 454*5ca02815Sjsg PIN_USER | PIN_NOEVICT); 455*5ca02815Sjsg if (unlikely(err)) 456*5ca02815Sjsg return err; 457ad8b1aafSjsg } 458ad8b1aafSjsg 459c349dbc7Sjsg if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)) { 460*5ca02815Sjsg err = i915_vma_pin_fence(vma); 461*5ca02815Sjsg if (unlikely(err)) { 462c349dbc7Sjsg i915_vma_unpin(vma); 463*5ca02815Sjsg return err; 464c349dbc7Sjsg } 465c349dbc7Sjsg 466c349dbc7Sjsg if (vma->fence) 467c349dbc7Sjsg ev->flags |= __EXEC_OBJECT_HAS_FENCE; 468c349dbc7Sjsg } 469c349dbc7Sjsg 470c349dbc7Sjsg ev->flags |= __EXEC_OBJECT_HAS_PIN; 471*5ca02815Sjsg if (eb_vma_misplaced(entry, vma, ev->flags)) 472*5ca02815Sjsg return -EBADSLT; 473*5ca02815Sjsg 474*5ca02815Sjsg return 0; 475c349dbc7Sjsg } 476c349dbc7Sjsg 477c349dbc7Sjsg static inline void 478c349dbc7Sjsg eb_unreserve_vma(struct eb_vma *ev) 479c349dbc7Sjsg { 480c349dbc7Sjsg if (!(ev->flags & __EXEC_OBJECT_HAS_PIN)) 481c349dbc7Sjsg return; 482c349dbc7Sjsg 483ad8b1aafSjsg if (unlikely(ev->flags & __EXEC_OBJECT_HAS_FENCE)) 484ad8b1aafSjsg __i915_vma_unpin_fence(ev->vma); 485ad8b1aafSjsg 486ad8b1aafSjsg __i915_vma_unpin(ev->vma); 487c349dbc7Sjsg ev->flags &= ~__EXEC_OBJECT_RESERVED; 488c349dbc7Sjsg } 489c349dbc7Sjsg 490c349dbc7Sjsg static int 491c349dbc7Sjsg eb_validate_vma(struct i915_execbuffer *eb, 492c349dbc7Sjsg struct drm_i915_gem_exec_object2 *entry, 493c349dbc7Sjsg struct i915_vma *vma) 494c349dbc7Sjsg { 495*5ca02815Sjsg /* Relocations are disallowed for all platforms after TGL-LP. This 496*5ca02815Sjsg * also covers all platforms with local memory. 497*5ca02815Sjsg */ 498*5ca02815Sjsg if (entry->relocation_count && 499*5ca02815Sjsg GRAPHICS_VER(eb->i915) >= 12 && !IS_TIGERLAKE(eb->i915)) 500*5ca02815Sjsg return -EINVAL; 501*5ca02815Sjsg 502c349dbc7Sjsg if (unlikely(entry->flags & eb->invalid_flags)) 503c349dbc7Sjsg return -EINVAL; 504c349dbc7Sjsg 505c349dbc7Sjsg if (unlikely(entry->alignment && 506c349dbc7Sjsg !is_power_of_2_u64(entry->alignment))) 507c349dbc7Sjsg return -EINVAL; 508c349dbc7Sjsg 509c349dbc7Sjsg /* 510c349dbc7Sjsg * Offset can be used as input (EXEC_OBJECT_PINNED), reject 511c349dbc7Sjsg * any non-page-aligned or non-canonical addresses. 512c349dbc7Sjsg */ 513c349dbc7Sjsg if (unlikely(entry->flags & EXEC_OBJECT_PINNED && 514c349dbc7Sjsg entry->offset != gen8_canonical_addr(entry->offset & I915_GTT_PAGE_MASK))) 515c349dbc7Sjsg return -EINVAL; 516c349dbc7Sjsg 517c349dbc7Sjsg /* pad_to_size was once a reserved field, so sanitize it */ 518c349dbc7Sjsg if (entry->flags & EXEC_OBJECT_PAD_TO_SIZE) { 519c349dbc7Sjsg if (unlikely(offset_in_page(entry->pad_to_size))) 520c349dbc7Sjsg return -EINVAL; 521c349dbc7Sjsg } else { 522c349dbc7Sjsg entry->pad_to_size = 0; 523c349dbc7Sjsg } 524c349dbc7Sjsg /* 525c349dbc7Sjsg * From drm_mm perspective address space is continuous, 526c349dbc7Sjsg * so from this point we're always using non-canonical 527c349dbc7Sjsg * form internally. 528c349dbc7Sjsg */ 529c349dbc7Sjsg entry->offset = gen8_noncanonical_addr(entry->offset); 530c349dbc7Sjsg 531c349dbc7Sjsg if (!eb->reloc_cache.has_fence) { 532c349dbc7Sjsg entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE; 533c349dbc7Sjsg } else { 534c349dbc7Sjsg if ((entry->flags & EXEC_OBJECT_NEEDS_FENCE || 535c349dbc7Sjsg eb->reloc_cache.needs_unfenced) && 536c349dbc7Sjsg i915_gem_object_is_tiled(vma->obj)) 537c349dbc7Sjsg entry->flags |= EXEC_OBJECT_NEEDS_GTT | __EXEC_OBJECT_NEEDS_MAP; 538c349dbc7Sjsg } 539c349dbc7Sjsg 540c349dbc7Sjsg return 0; 541c349dbc7Sjsg } 542c349dbc7Sjsg 543c349dbc7Sjsg static void 544c349dbc7Sjsg eb_add_vma(struct i915_execbuffer *eb, 545c349dbc7Sjsg unsigned int i, unsigned batch_idx, 546c349dbc7Sjsg struct i915_vma *vma) 547c349dbc7Sjsg { 548c349dbc7Sjsg struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; 549c349dbc7Sjsg struct eb_vma *ev = &eb->vma[i]; 550c349dbc7Sjsg 551ad8b1aafSjsg ev->vma = vma; 552c349dbc7Sjsg ev->exec = entry; 553c349dbc7Sjsg ev->flags = entry->flags; 554c349dbc7Sjsg 555c349dbc7Sjsg if (eb->lut_size > 0) { 556c349dbc7Sjsg ev->handle = entry->handle; 557c349dbc7Sjsg hlist_add_head(&ev->node, 558c349dbc7Sjsg &eb->buckets[hash_32(entry->handle, 559c349dbc7Sjsg eb->lut_size)]); 560c349dbc7Sjsg } 561c349dbc7Sjsg 562c349dbc7Sjsg if (entry->relocation_count) 563c349dbc7Sjsg list_add_tail(&ev->reloc_link, &eb->relocs); 564c349dbc7Sjsg 565c349dbc7Sjsg /* 566c349dbc7Sjsg * SNA is doing fancy tricks with compressing batch buffers, which leads 567c349dbc7Sjsg * to negative relocation deltas. Usually that works out ok since the 568c349dbc7Sjsg * relocate address is still positive, except when the batch is placed 569c349dbc7Sjsg * very low in the GTT. Ensure this doesn't happen. 570c349dbc7Sjsg * 571c349dbc7Sjsg * Note that actual hangs have only been observed on gen7, but for 572c349dbc7Sjsg * paranoia do it everywhere. 573c349dbc7Sjsg */ 574c349dbc7Sjsg if (i == batch_idx) { 575c349dbc7Sjsg if (entry->relocation_count && 576c349dbc7Sjsg !(ev->flags & EXEC_OBJECT_PINNED)) 577c349dbc7Sjsg ev->flags |= __EXEC_OBJECT_NEEDS_BIAS; 578c349dbc7Sjsg if (eb->reloc_cache.has_fence) 579c349dbc7Sjsg ev->flags |= EXEC_OBJECT_NEEDS_FENCE; 580c349dbc7Sjsg 581c349dbc7Sjsg eb->batch = ev; 582c349dbc7Sjsg } 583c349dbc7Sjsg } 584c349dbc7Sjsg 585c349dbc7Sjsg static inline int use_cpu_reloc(const struct reloc_cache *cache, 586c349dbc7Sjsg const struct drm_i915_gem_object *obj) 587c349dbc7Sjsg { 588c349dbc7Sjsg if (!i915_gem_object_has_struct_page(obj)) 589c349dbc7Sjsg return false; 590c349dbc7Sjsg 591c349dbc7Sjsg if (DBG_FORCE_RELOC == FORCE_CPU_RELOC) 592c349dbc7Sjsg return true; 593c349dbc7Sjsg 594c349dbc7Sjsg if (DBG_FORCE_RELOC == FORCE_GTT_RELOC) 595c349dbc7Sjsg return false; 596c349dbc7Sjsg 597c349dbc7Sjsg return (cache->has_llc || 598c349dbc7Sjsg obj->cache_dirty || 599c349dbc7Sjsg obj->cache_level != I915_CACHE_NONE); 600c349dbc7Sjsg } 601c349dbc7Sjsg 602ad8b1aafSjsg static int eb_reserve_vma(struct i915_execbuffer *eb, 603c349dbc7Sjsg struct eb_vma *ev, 604c349dbc7Sjsg u64 pin_flags) 605c349dbc7Sjsg { 606c349dbc7Sjsg struct drm_i915_gem_exec_object2 *entry = ev->exec; 607c349dbc7Sjsg struct i915_vma *vma = ev->vma; 608c349dbc7Sjsg int err; 609c349dbc7Sjsg 610c349dbc7Sjsg if (drm_mm_node_allocated(&vma->node) && 611c349dbc7Sjsg eb_vma_misplaced(entry, vma, ev->flags)) { 612c349dbc7Sjsg err = i915_vma_unbind(vma); 613c349dbc7Sjsg if (err) 614c349dbc7Sjsg return err; 615c349dbc7Sjsg } 616c349dbc7Sjsg 617ad8b1aafSjsg err = i915_vma_pin_ww(vma, &eb->ww, 618c349dbc7Sjsg entry->pad_to_size, entry->alignment, 619ad8b1aafSjsg eb_pin_flags(entry, ev->flags) | pin_flags); 620c349dbc7Sjsg if (err) 621c349dbc7Sjsg return err; 622c349dbc7Sjsg 623c349dbc7Sjsg if (entry->offset != vma->node.start) { 624c349dbc7Sjsg entry->offset = vma->node.start | UPDATE; 625c349dbc7Sjsg eb->args->flags |= __EXEC_HAS_RELOC; 626c349dbc7Sjsg } 627c349dbc7Sjsg 628ad8b1aafSjsg if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)) { 629c349dbc7Sjsg err = i915_vma_pin_fence(vma); 630c349dbc7Sjsg if (unlikely(err)) { 631c349dbc7Sjsg i915_vma_unpin(vma); 632c349dbc7Sjsg return err; 633c349dbc7Sjsg } 634c349dbc7Sjsg 635c349dbc7Sjsg if (vma->fence) 636ad8b1aafSjsg ev->flags |= __EXEC_OBJECT_HAS_FENCE; 637c349dbc7Sjsg } 638c349dbc7Sjsg 639ad8b1aafSjsg ev->flags |= __EXEC_OBJECT_HAS_PIN; 640c349dbc7Sjsg GEM_BUG_ON(eb_vma_misplaced(entry, vma, ev->flags)); 641c349dbc7Sjsg 642c349dbc7Sjsg return 0; 643c349dbc7Sjsg } 644c349dbc7Sjsg 645c349dbc7Sjsg static int eb_reserve(struct i915_execbuffer *eb) 646c349dbc7Sjsg { 647c349dbc7Sjsg const unsigned int count = eb->buffer_count; 648c349dbc7Sjsg unsigned int pin_flags = PIN_USER | PIN_NONBLOCK; 649c349dbc7Sjsg struct list_head last; 650c349dbc7Sjsg struct eb_vma *ev; 651c349dbc7Sjsg unsigned int i, pass; 652c349dbc7Sjsg int err = 0; 653c349dbc7Sjsg 654c349dbc7Sjsg /* 655c349dbc7Sjsg * Attempt to pin all of the buffers into the GTT. 656c349dbc7Sjsg * This is done in 3 phases: 657c349dbc7Sjsg * 658c349dbc7Sjsg * 1a. Unbind all objects that do not match the GTT constraints for 659c349dbc7Sjsg * the execbuffer (fenceable, mappable, alignment etc). 660c349dbc7Sjsg * 1b. Increment pin count for already bound objects. 661c349dbc7Sjsg * 2. Bind new objects. 662c349dbc7Sjsg * 3. Decrement pin count. 663c349dbc7Sjsg * 664c349dbc7Sjsg * This avoid unnecessary unbinding of later objects in order to make 665c349dbc7Sjsg * room for the earlier objects *unless* we need to defragment. 666c349dbc7Sjsg */ 667c349dbc7Sjsg pass = 0; 668c349dbc7Sjsg do { 669c349dbc7Sjsg list_for_each_entry(ev, &eb->unbound, bind_link) { 670c349dbc7Sjsg err = eb_reserve_vma(eb, ev, pin_flags); 671c349dbc7Sjsg if (err) 672c349dbc7Sjsg break; 673c349dbc7Sjsg } 674ad8b1aafSjsg if (err != -ENOSPC) 675ad8b1aafSjsg return err; 676c349dbc7Sjsg 677c349dbc7Sjsg /* Resort *all* the objects into priority order */ 678c349dbc7Sjsg INIT_LIST_HEAD(&eb->unbound); 679c349dbc7Sjsg INIT_LIST_HEAD(&last); 680c349dbc7Sjsg for (i = 0; i < count; i++) { 681c349dbc7Sjsg unsigned int flags; 682c349dbc7Sjsg 683c349dbc7Sjsg ev = &eb->vma[i]; 684c349dbc7Sjsg flags = ev->flags; 685c349dbc7Sjsg if (flags & EXEC_OBJECT_PINNED && 686c349dbc7Sjsg flags & __EXEC_OBJECT_HAS_PIN) 687c349dbc7Sjsg continue; 688c349dbc7Sjsg 689c349dbc7Sjsg eb_unreserve_vma(ev); 690c349dbc7Sjsg 691c349dbc7Sjsg if (flags & EXEC_OBJECT_PINNED) 692c349dbc7Sjsg /* Pinned must have their slot */ 693c349dbc7Sjsg list_add(&ev->bind_link, &eb->unbound); 694c349dbc7Sjsg else if (flags & __EXEC_OBJECT_NEEDS_MAP) 695c349dbc7Sjsg /* Map require the lowest 256MiB (aperture) */ 696c349dbc7Sjsg list_add_tail(&ev->bind_link, &eb->unbound); 697c349dbc7Sjsg else if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS)) 698c349dbc7Sjsg /* Prioritise 4GiB region for restricted bo */ 699c349dbc7Sjsg list_add(&ev->bind_link, &last); 700c349dbc7Sjsg else 701c349dbc7Sjsg list_add_tail(&ev->bind_link, &last); 702c349dbc7Sjsg } 703c349dbc7Sjsg list_splice_tail(&last, &eb->unbound); 704c349dbc7Sjsg 705c349dbc7Sjsg switch (pass++) { 706c349dbc7Sjsg case 0: 707c349dbc7Sjsg break; 708c349dbc7Sjsg 709c349dbc7Sjsg case 1: 710c349dbc7Sjsg /* Too fragmented, unbind everything and retry */ 711c349dbc7Sjsg mutex_lock(&eb->context->vm->mutex); 712c349dbc7Sjsg err = i915_gem_evict_vm(eb->context->vm); 713c349dbc7Sjsg mutex_unlock(&eb->context->vm->mutex); 714c349dbc7Sjsg if (err) 715ad8b1aafSjsg return err; 716c349dbc7Sjsg break; 717c349dbc7Sjsg 718c349dbc7Sjsg default: 719ad8b1aafSjsg return -ENOSPC; 720c349dbc7Sjsg } 721c349dbc7Sjsg 722c349dbc7Sjsg pin_flags = PIN_USER; 723c349dbc7Sjsg } while (1); 724c349dbc7Sjsg } 725c349dbc7Sjsg 726c349dbc7Sjsg static unsigned int eb_batch_index(const struct i915_execbuffer *eb) 727c349dbc7Sjsg { 728c349dbc7Sjsg if (eb->args->flags & I915_EXEC_BATCH_FIRST) 729c349dbc7Sjsg return 0; 730c349dbc7Sjsg else 731c349dbc7Sjsg return eb->buffer_count - 1; 732c349dbc7Sjsg } 733c349dbc7Sjsg 734c349dbc7Sjsg static int eb_select_context(struct i915_execbuffer *eb) 735c349dbc7Sjsg { 736c349dbc7Sjsg struct i915_gem_context *ctx; 737c349dbc7Sjsg 738c349dbc7Sjsg ctx = i915_gem_context_lookup(eb->file->driver_priv, eb->args->rsvd1); 739*5ca02815Sjsg if (unlikely(IS_ERR(ctx))) 740*5ca02815Sjsg return PTR_ERR(ctx); 741c349dbc7Sjsg 742c349dbc7Sjsg eb->gem_context = ctx; 743c349dbc7Sjsg if (rcu_access_pointer(ctx->vm)) 744c349dbc7Sjsg eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT; 745c349dbc7Sjsg 746c349dbc7Sjsg return 0; 747c349dbc7Sjsg } 748c349dbc7Sjsg 749ad8b1aafSjsg static int __eb_add_lut(struct i915_execbuffer *eb, 750ad8b1aafSjsg u32 handle, struct i915_vma *vma) 751c349dbc7Sjsg { 752ad8b1aafSjsg struct i915_gem_context *ctx = eb->gem_context; 753ad8b1aafSjsg struct i915_lut_handle *lut; 754c349dbc7Sjsg int err; 755c349dbc7Sjsg 756c349dbc7Sjsg lut = i915_lut_handle_alloc(); 757ad8b1aafSjsg if (unlikely(!lut)) 758ad8b1aafSjsg return -ENOMEM; 759c349dbc7Sjsg 760ad8b1aafSjsg i915_vma_get(vma); 761c349dbc7Sjsg if (!atomic_fetch_inc(&vma->open_count)) 762c349dbc7Sjsg i915_vma_reopen(vma); 763c349dbc7Sjsg lut->handle = handle; 764ad8b1aafSjsg lut->ctx = ctx; 765c349dbc7Sjsg 766ad8b1aafSjsg /* Check that the context hasn't been closed in the meantime */ 767ad8b1aafSjsg err = -EINTR; 768ad8b1aafSjsg if (!mutex_lock_interruptible(&ctx->lut_mutex)) { 769ad8b1aafSjsg struct i915_address_space *vm = rcu_access_pointer(ctx->vm); 770ad8b1aafSjsg 771ad8b1aafSjsg if (unlikely(vm && vma->vm != vm)) 772ad8b1aafSjsg err = -EAGAIN; /* user racing with ctx set-vm */ 773ad8b1aafSjsg else if (likely(!i915_gem_context_is_closed(ctx))) 774ad8b1aafSjsg err = radix_tree_insert(&ctx->handles_vma, handle, vma); 775ad8b1aafSjsg else 776ad8b1aafSjsg err = -ENOENT; 777ad8b1aafSjsg if (err == 0) { /* And nor has this handle */ 778ad8b1aafSjsg struct drm_i915_gem_object *obj = vma->obj; 779ad8b1aafSjsg 780ad8b1aafSjsg spin_lock(&obj->lut_lock); 781ad8b1aafSjsg if (idr_find(&eb->file->object_idr, handle) == obj) { 782c349dbc7Sjsg list_add(&lut->obj_link, &obj->lut_list); 783ad8b1aafSjsg } else { 784ad8b1aafSjsg radix_tree_delete(&ctx->handles_vma, handle); 785ad8b1aafSjsg err = -ENOENT; 786ad8b1aafSjsg } 787ad8b1aafSjsg spin_unlock(&obj->lut_lock); 788ad8b1aafSjsg } 789ad8b1aafSjsg mutex_unlock(&ctx->lut_mutex); 790ad8b1aafSjsg } 791c349dbc7Sjsg if (unlikely(err)) 792ad8b1aafSjsg goto err; 793ad8b1aafSjsg 794ad8b1aafSjsg return 0; 795ad8b1aafSjsg 796ad8b1aafSjsg err: 797ad8b1aafSjsg i915_vma_close(vma); 798ad8b1aafSjsg i915_vma_put(vma); 799ad8b1aafSjsg i915_lut_handle_free(lut); 800ad8b1aafSjsg return err; 801ad8b1aafSjsg } 802ad8b1aafSjsg 803ad8b1aafSjsg static struct i915_vma *eb_lookup_vma(struct i915_execbuffer *eb, u32 handle) 804ad8b1aafSjsg { 805ad8b1aafSjsg struct i915_address_space *vm = eb->context->vm; 806ad8b1aafSjsg 807ad8b1aafSjsg do { 808ad8b1aafSjsg struct drm_i915_gem_object *obj; 809ad8b1aafSjsg struct i915_vma *vma; 810ad8b1aafSjsg int err; 811ad8b1aafSjsg 812ad8b1aafSjsg rcu_read_lock(); 813ad8b1aafSjsg vma = radix_tree_lookup(&eb->gem_context->handles_vma, handle); 814ad8b1aafSjsg if (likely(vma && vma->vm == vm)) 815ad8b1aafSjsg vma = i915_vma_tryget(vma); 816ad8b1aafSjsg rcu_read_unlock(); 817ad8b1aafSjsg if (likely(vma)) 818ad8b1aafSjsg return vma; 819ad8b1aafSjsg 820ad8b1aafSjsg obj = i915_gem_object_lookup(eb->file, handle); 821ad8b1aafSjsg if (unlikely(!obj)) 822ad8b1aafSjsg return ERR_PTR(-ENOENT); 823ad8b1aafSjsg 824ad8b1aafSjsg vma = i915_vma_instance(obj, vm, NULL); 825ad8b1aafSjsg if (IS_ERR(vma)) { 826ad8b1aafSjsg i915_gem_object_put(obj); 827ad8b1aafSjsg return vma; 828ad8b1aafSjsg } 829ad8b1aafSjsg 830ad8b1aafSjsg err = __eb_add_lut(eb, handle, vma); 831ad8b1aafSjsg if (likely(!err)) 832ad8b1aafSjsg return vma; 833ad8b1aafSjsg 834ad8b1aafSjsg i915_gem_object_put(obj); 835ad8b1aafSjsg if (err != -EEXIST) 836ad8b1aafSjsg return ERR_PTR(err); 837ad8b1aafSjsg } while (1); 838ad8b1aafSjsg } 839ad8b1aafSjsg 840ad8b1aafSjsg static int eb_lookup_vmas(struct i915_execbuffer *eb) 841ad8b1aafSjsg { 842ad8b1aafSjsg struct drm_i915_private *i915 = eb->i915; 843ad8b1aafSjsg unsigned int batch = eb_batch_index(eb); 844ad8b1aafSjsg unsigned int i; 845ad8b1aafSjsg int err = 0; 846ad8b1aafSjsg 847ad8b1aafSjsg INIT_LIST_HEAD(&eb->relocs); 848ad8b1aafSjsg 849ad8b1aafSjsg for (i = 0; i < eb->buffer_count; i++) { 850ad8b1aafSjsg struct i915_vma *vma; 851ad8b1aafSjsg 852ad8b1aafSjsg vma = eb_lookup_vma(eb, eb->exec[i].handle); 853ad8b1aafSjsg if (IS_ERR(vma)) { 854ad8b1aafSjsg err = PTR_ERR(vma); 855ad8b1aafSjsg goto err; 856ad8b1aafSjsg } 857ad8b1aafSjsg 858ad8b1aafSjsg err = eb_validate_vma(eb, &eb->exec[i], vma); 859ad8b1aafSjsg if (unlikely(err)) { 860ad8b1aafSjsg i915_vma_put(vma); 861ad8b1aafSjsg goto err; 862ad8b1aafSjsg } 863c349dbc7Sjsg 864c349dbc7Sjsg eb_add_vma(eb, i, batch, vma); 865*5ca02815Sjsg 866*5ca02815Sjsg if (i915_gem_object_is_userptr(vma->obj)) { 867*5ca02815Sjsg err = i915_gem_object_userptr_submit_init(vma->obj); 868*5ca02815Sjsg if (err) { 869*5ca02815Sjsg if (i + 1 < eb->buffer_count) { 870*5ca02815Sjsg /* 871*5ca02815Sjsg * Execbuffer code expects last vma entry to be NULL, 872*5ca02815Sjsg * since we already initialized this entry, 873*5ca02815Sjsg * set the next value to NULL or we mess up 874*5ca02815Sjsg * cleanup handling. 875*5ca02815Sjsg */ 876*5ca02815Sjsg eb->vma[i + 1].vma = NULL; 877*5ca02815Sjsg } 878*5ca02815Sjsg 879*5ca02815Sjsg return err; 880*5ca02815Sjsg } 881*5ca02815Sjsg 882*5ca02815Sjsg eb->vma[i].flags |= __EXEC_OBJECT_USERPTR_INIT; 883*5ca02815Sjsg eb->args->flags |= __EXEC_USERPTR_USED; 884*5ca02815Sjsg } 885c349dbc7Sjsg } 886c349dbc7Sjsg 887ad8b1aafSjsg if (unlikely(eb->batch->flags & EXEC_OBJECT_WRITE)) { 888ad8b1aafSjsg drm_dbg(&i915->drm, 889ad8b1aafSjsg "Attempting to use self-modifying batch buffer\n"); 890ad8b1aafSjsg return -EINVAL; 891ad8b1aafSjsg } 892ad8b1aafSjsg 893ad8b1aafSjsg if (range_overflows_t(u64, 894ad8b1aafSjsg eb->batch_start_offset, eb->batch_len, 895ad8b1aafSjsg eb->batch->vma->size)) { 896ad8b1aafSjsg drm_dbg(&i915->drm, "Attempting to use out-of-bounds batch\n"); 897ad8b1aafSjsg return -EINVAL; 898ad8b1aafSjsg } 899ad8b1aafSjsg 900ad8b1aafSjsg if (eb->batch_len == 0) 901ad8b1aafSjsg eb->batch_len = eb->batch->vma->size - eb->batch_start_offset; 902ad8b1aafSjsg if (unlikely(eb->batch_len == 0)) { /* impossible! */ 903ad8b1aafSjsg drm_dbg(&i915->drm, "Invalid batch length\n"); 904ad8b1aafSjsg return -EINVAL; 905ad8b1aafSjsg } 906ad8b1aafSjsg 907c349dbc7Sjsg return 0; 908c349dbc7Sjsg 909ad8b1aafSjsg err: 910c349dbc7Sjsg eb->vma[i].vma = NULL; 911c349dbc7Sjsg return err; 912c349dbc7Sjsg } 913c349dbc7Sjsg 914*5ca02815Sjsg static int eb_lock_vmas(struct i915_execbuffer *eb) 915*5ca02815Sjsg { 916*5ca02815Sjsg unsigned int i; 917*5ca02815Sjsg int err; 918*5ca02815Sjsg 919*5ca02815Sjsg for (i = 0; i < eb->buffer_count; i++) { 920*5ca02815Sjsg struct eb_vma *ev = &eb->vma[i]; 921*5ca02815Sjsg struct i915_vma *vma = ev->vma; 922*5ca02815Sjsg 923*5ca02815Sjsg err = i915_gem_object_lock(vma->obj, &eb->ww); 924*5ca02815Sjsg if (err) 925*5ca02815Sjsg return err; 926*5ca02815Sjsg } 927*5ca02815Sjsg 928*5ca02815Sjsg return 0; 929*5ca02815Sjsg } 930*5ca02815Sjsg 931ad8b1aafSjsg static int eb_validate_vmas(struct i915_execbuffer *eb) 932ad8b1aafSjsg { 933ad8b1aafSjsg unsigned int i; 934ad8b1aafSjsg int err; 935ad8b1aafSjsg 936ad8b1aafSjsg INIT_LIST_HEAD(&eb->unbound); 937ad8b1aafSjsg 938*5ca02815Sjsg err = eb_lock_vmas(eb); 939*5ca02815Sjsg if (err) 940*5ca02815Sjsg return err; 941*5ca02815Sjsg 942ad8b1aafSjsg for (i = 0; i < eb->buffer_count; i++) { 943ad8b1aafSjsg struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; 944ad8b1aafSjsg struct eb_vma *ev = &eb->vma[i]; 945ad8b1aafSjsg struct i915_vma *vma = ev->vma; 946ad8b1aafSjsg 947*5ca02815Sjsg err = eb_pin_vma(eb, entry, ev); 948*5ca02815Sjsg if (err == -EDEADLK) 949ad8b1aafSjsg return err; 950ad8b1aafSjsg 951*5ca02815Sjsg if (!err) { 952ad8b1aafSjsg if (entry->offset != vma->node.start) { 953ad8b1aafSjsg entry->offset = vma->node.start | UPDATE; 954ad8b1aafSjsg eb->args->flags |= __EXEC_HAS_RELOC; 955ad8b1aafSjsg } 956ad8b1aafSjsg } else { 957ad8b1aafSjsg eb_unreserve_vma(ev); 958ad8b1aafSjsg 959ad8b1aafSjsg list_add_tail(&ev->bind_link, &eb->unbound); 960ad8b1aafSjsg if (drm_mm_node_allocated(&vma->node)) { 961ad8b1aafSjsg err = i915_vma_unbind(vma); 962ad8b1aafSjsg if (err) 963ad8b1aafSjsg return err; 964ad8b1aafSjsg } 965ad8b1aafSjsg } 966ad8b1aafSjsg 967*5ca02815Sjsg if (!(ev->flags & EXEC_OBJECT_WRITE)) { 968*5ca02815Sjsg err = dma_resv_reserve_shared(vma->resv, 1); 969*5ca02815Sjsg if (err) 970*5ca02815Sjsg return err; 971*5ca02815Sjsg } 972*5ca02815Sjsg 973ad8b1aafSjsg GEM_BUG_ON(drm_mm_node_allocated(&vma->node) && 974ad8b1aafSjsg eb_vma_misplaced(&eb->exec[i], vma, ev->flags)); 975ad8b1aafSjsg } 976ad8b1aafSjsg 977ad8b1aafSjsg if (!list_empty(&eb->unbound)) 978ad8b1aafSjsg return eb_reserve(eb); 979ad8b1aafSjsg 980ad8b1aafSjsg return 0; 981ad8b1aafSjsg } 982ad8b1aafSjsg 983c349dbc7Sjsg static struct eb_vma * 984c349dbc7Sjsg eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle) 985c349dbc7Sjsg { 986c349dbc7Sjsg if (eb->lut_size < 0) { 987c349dbc7Sjsg if (handle >= -eb->lut_size) 988c349dbc7Sjsg return NULL; 989c349dbc7Sjsg return &eb->vma[handle]; 990c349dbc7Sjsg } else { 991c349dbc7Sjsg struct hlist_head *head; 992c349dbc7Sjsg struct eb_vma *ev; 993c349dbc7Sjsg 994c349dbc7Sjsg head = &eb->buckets[hash_32(handle, eb->lut_size)]; 995c349dbc7Sjsg hlist_for_each_entry(ev, head, node) { 996c349dbc7Sjsg if (ev->handle == handle) 997c349dbc7Sjsg return ev; 998c349dbc7Sjsg } 999c349dbc7Sjsg return NULL; 1000c349dbc7Sjsg } 1001c349dbc7Sjsg } 1002c349dbc7Sjsg 1003ad8b1aafSjsg static void eb_release_vmas(struct i915_execbuffer *eb, bool final) 1004c349dbc7Sjsg { 1005c349dbc7Sjsg const unsigned int count = eb->buffer_count; 1006c349dbc7Sjsg unsigned int i; 1007c349dbc7Sjsg 1008c349dbc7Sjsg for (i = 0; i < count; i++) { 1009c349dbc7Sjsg struct eb_vma *ev = &eb->vma[i]; 1010c349dbc7Sjsg struct i915_vma *vma = ev->vma; 1011c349dbc7Sjsg 1012c349dbc7Sjsg if (!vma) 1013c349dbc7Sjsg break; 1014c349dbc7Sjsg 1015ad8b1aafSjsg eb_unreserve_vma(ev); 1016c349dbc7Sjsg 1017ad8b1aafSjsg if (final) 1018c349dbc7Sjsg i915_vma_put(vma); 1019c349dbc7Sjsg } 1020ad8b1aafSjsg 1021ad8b1aafSjsg eb_unpin_engine(eb); 1022c349dbc7Sjsg } 1023c349dbc7Sjsg 1024c349dbc7Sjsg static void eb_destroy(const struct i915_execbuffer *eb) 1025c349dbc7Sjsg { 1026c349dbc7Sjsg if (eb->lut_size > 0) 1027c349dbc7Sjsg kfree(eb->buckets); 1028c349dbc7Sjsg } 1029c349dbc7Sjsg 1030c349dbc7Sjsg static inline u64 1031c349dbc7Sjsg relocation_target(const struct drm_i915_gem_relocation_entry *reloc, 1032c349dbc7Sjsg const struct i915_vma *target) 1033c349dbc7Sjsg { 1034c349dbc7Sjsg return gen8_canonical_addr((int)reloc->delta + target->node.start); 1035c349dbc7Sjsg } 1036c349dbc7Sjsg 1037c349dbc7Sjsg static void reloc_cache_init(struct reloc_cache *cache, 1038c349dbc7Sjsg struct drm_i915_private *i915) 1039c349dbc7Sjsg { 1040c349dbc7Sjsg cache->page = -1; 1041c349dbc7Sjsg cache->vaddr = 0; 1042c349dbc7Sjsg /* Must be a variable in the struct to allow GCC to unroll. */ 1043*5ca02815Sjsg cache->graphics_ver = GRAPHICS_VER(i915); 1044c349dbc7Sjsg cache->has_llc = HAS_LLC(i915); 1045c349dbc7Sjsg cache->use_64bit_reloc = HAS_64BIT_RELOC(i915); 1046*5ca02815Sjsg cache->has_fence = cache->graphics_ver < 4; 1047c349dbc7Sjsg cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment; 1048c349dbc7Sjsg cache->node.flags = 0; 1049c349dbc7Sjsg 1050c349dbc7Sjsg cache->map = i915->agph; 1051c349dbc7Sjsg cache->iot = i915->bst; 1052c349dbc7Sjsg } 1053c349dbc7Sjsg 1054c349dbc7Sjsg static inline void *unmask_page(unsigned long p) 1055c349dbc7Sjsg { 1056ad8b1aafSjsg return (void *)(uintptr_t)(p & LINUX_PAGE_MASK); 1057c349dbc7Sjsg } 1058c349dbc7Sjsg 1059c349dbc7Sjsg static inline unsigned int unmask_flags(unsigned long p) 1060c349dbc7Sjsg { 1061ad8b1aafSjsg return p & ~LINUX_PAGE_MASK; 1062c349dbc7Sjsg } 1063c349dbc7Sjsg 1064c349dbc7Sjsg #define KMAP 0x4 /* after CLFLUSH_FLAGS */ 1065c349dbc7Sjsg 1066c349dbc7Sjsg static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache) 1067c349dbc7Sjsg { 1068c349dbc7Sjsg struct drm_i915_private *i915 = 1069c349dbc7Sjsg container_of(cache, struct i915_execbuffer, reloc_cache)->i915; 1070c349dbc7Sjsg return &i915->ggtt; 1071c349dbc7Sjsg } 1072c349dbc7Sjsg 1073ad8b1aafSjsg static void reloc_cache_reset(struct reloc_cache *cache, struct i915_execbuffer *eb) 1074c349dbc7Sjsg { 1075c349dbc7Sjsg void *vaddr; 1076c349dbc7Sjsg 1077c349dbc7Sjsg if (!cache->vaddr) 1078c349dbc7Sjsg return; 1079c349dbc7Sjsg 1080c349dbc7Sjsg vaddr = unmask_page(cache->vaddr); 1081c349dbc7Sjsg if (cache->vaddr & KMAP) { 1082ad8b1aafSjsg struct drm_i915_gem_object *obj = 1083ad8b1aafSjsg (struct drm_i915_gem_object *)cache->node.mm; 1084c349dbc7Sjsg if (cache->vaddr & CLFLUSH_AFTER) 1085c349dbc7Sjsg mb(); 1086c349dbc7Sjsg 1087c349dbc7Sjsg kunmap_atomic(vaddr); 1088ad8b1aafSjsg i915_gem_object_finish_access(obj); 1089c349dbc7Sjsg } else { 1090c349dbc7Sjsg struct i915_ggtt *ggtt = cache_to_ggtt(cache); 1091c349dbc7Sjsg 1092c349dbc7Sjsg intel_gt_flush_ggtt_writes(ggtt->vm.gt); 1093c349dbc7Sjsg #ifdef __linux__ 1094c349dbc7Sjsg io_mapping_unmap_atomic((void __iomem *)vaddr); 1095c349dbc7Sjsg #else 1096c349dbc7Sjsg agp_unmap_atomic(cache->map, cache->ioh); 1097c349dbc7Sjsg #endif 1098c349dbc7Sjsg 1099c349dbc7Sjsg if (drm_mm_node_allocated(&cache->node)) { 1100c349dbc7Sjsg ggtt->vm.clear_range(&ggtt->vm, 1101c349dbc7Sjsg cache->node.start, 1102c349dbc7Sjsg cache->node.size); 1103c349dbc7Sjsg mutex_lock(&ggtt->vm.mutex); 1104c349dbc7Sjsg drm_mm_remove_node(&cache->node); 1105c349dbc7Sjsg mutex_unlock(&ggtt->vm.mutex); 1106c349dbc7Sjsg } else { 1107c349dbc7Sjsg i915_vma_unpin((struct i915_vma *)cache->node.mm); 1108c349dbc7Sjsg } 1109c349dbc7Sjsg } 1110c349dbc7Sjsg 1111c349dbc7Sjsg cache->vaddr = 0; 1112c349dbc7Sjsg cache->page = -1; 1113c349dbc7Sjsg } 1114c349dbc7Sjsg 1115c349dbc7Sjsg static void *reloc_kmap(struct drm_i915_gem_object *obj, 1116c349dbc7Sjsg struct reloc_cache *cache, 1117ad8b1aafSjsg unsigned long pageno) 1118c349dbc7Sjsg { 1119c349dbc7Sjsg void *vaddr; 1120ad8b1aafSjsg struct vm_page *page; 1121c349dbc7Sjsg 1122c349dbc7Sjsg if (cache->vaddr) { 1123c349dbc7Sjsg kunmap_atomic(unmask_page(cache->vaddr)); 1124c349dbc7Sjsg } else { 1125c349dbc7Sjsg unsigned int flushes; 1126c349dbc7Sjsg int err; 1127c349dbc7Sjsg 1128c349dbc7Sjsg err = i915_gem_object_prepare_write(obj, &flushes); 1129c349dbc7Sjsg if (err) 1130c349dbc7Sjsg return ERR_PTR(err); 1131c349dbc7Sjsg 1132c349dbc7Sjsg BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS); 1133ad8b1aafSjsg BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & LINUX_PAGE_MASK); 1134c349dbc7Sjsg 1135c349dbc7Sjsg cache->vaddr = flushes | KMAP; 1136c349dbc7Sjsg cache->node.mm = (void *)obj; 1137c349dbc7Sjsg if (flushes) 1138c349dbc7Sjsg mb(); 1139c349dbc7Sjsg } 1140c349dbc7Sjsg 1141ad8b1aafSjsg page = i915_gem_object_get_page(obj, pageno); 1142ad8b1aafSjsg if (!obj->mm.dirty) 1143ad8b1aafSjsg set_page_dirty(page); 1144ad8b1aafSjsg 1145ad8b1aafSjsg vaddr = kmap_atomic(page); 1146c349dbc7Sjsg cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr; 1147ad8b1aafSjsg cache->page = pageno; 1148c349dbc7Sjsg 1149c349dbc7Sjsg return vaddr; 1150c349dbc7Sjsg } 1151c349dbc7Sjsg 1152c349dbc7Sjsg static void *reloc_iomap(struct drm_i915_gem_object *obj, 1153ad8b1aafSjsg struct i915_execbuffer *eb, 1154c349dbc7Sjsg unsigned long page) 1155c349dbc7Sjsg { 1156ad8b1aafSjsg struct reloc_cache *cache = &eb->reloc_cache; 1157c349dbc7Sjsg struct i915_ggtt *ggtt = cache_to_ggtt(cache); 1158c349dbc7Sjsg unsigned long offset; 1159c349dbc7Sjsg void *vaddr; 1160c349dbc7Sjsg 1161c349dbc7Sjsg if (cache->vaddr) { 1162c349dbc7Sjsg intel_gt_flush_ggtt_writes(ggtt->vm.gt); 1163c349dbc7Sjsg #ifdef __linux__ 1164c349dbc7Sjsg io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr)); 1165c349dbc7Sjsg #else 1166c349dbc7Sjsg agp_unmap_atomic(cache->map, cache->ioh); 1167c349dbc7Sjsg #endif 1168c349dbc7Sjsg } else { 1169c349dbc7Sjsg struct i915_vma *vma; 1170c349dbc7Sjsg int err; 1171c349dbc7Sjsg 1172c349dbc7Sjsg if (i915_gem_object_is_tiled(obj)) 1173c349dbc7Sjsg return ERR_PTR(-EINVAL); 1174c349dbc7Sjsg 1175c349dbc7Sjsg if (use_cpu_reloc(cache, obj)) 1176c349dbc7Sjsg return NULL; 1177c349dbc7Sjsg 1178c349dbc7Sjsg err = i915_gem_object_set_to_gtt_domain(obj, true); 1179c349dbc7Sjsg if (err) 1180c349dbc7Sjsg return ERR_PTR(err); 1181c349dbc7Sjsg 1182ad8b1aafSjsg vma = i915_gem_object_ggtt_pin_ww(obj, &eb->ww, NULL, 0, 0, 1183c349dbc7Sjsg PIN_MAPPABLE | 1184c349dbc7Sjsg PIN_NONBLOCK /* NOWARN */ | 1185c349dbc7Sjsg PIN_NOEVICT); 1186ad8b1aafSjsg if (vma == ERR_PTR(-EDEADLK)) 1187ad8b1aafSjsg return vma; 1188ad8b1aafSjsg 1189c349dbc7Sjsg if (IS_ERR(vma)) { 1190c349dbc7Sjsg memset(&cache->node, 0, sizeof(cache->node)); 1191c349dbc7Sjsg mutex_lock(&ggtt->vm.mutex); 1192c349dbc7Sjsg err = drm_mm_insert_node_in_range 1193c349dbc7Sjsg (&ggtt->vm.mm, &cache->node, 1194c349dbc7Sjsg PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE, 1195c349dbc7Sjsg 0, ggtt->mappable_end, 1196c349dbc7Sjsg DRM_MM_INSERT_LOW); 1197c349dbc7Sjsg mutex_unlock(&ggtt->vm.mutex); 1198c349dbc7Sjsg if (err) /* no inactive aperture space, use cpu reloc */ 1199c349dbc7Sjsg return NULL; 1200c349dbc7Sjsg } else { 1201c349dbc7Sjsg cache->node.start = vma->node.start; 1202c349dbc7Sjsg cache->node.mm = (void *)vma; 1203c349dbc7Sjsg } 1204c349dbc7Sjsg } 1205c349dbc7Sjsg 1206c349dbc7Sjsg offset = cache->node.start; 1207c349dbc7Sjsg if (drm_mm_node_allocated(&cache->node)) { 1208c349dbc7Sjsg ggtt->vm.insert_page(&ggtt->vm, 1209c349dbc7Sjsg i915_gem_object_get_dma_address(obj, page), 1210c349dbc7Sjsg offset, I915_CACHE_NONE, 0); 1211c349dbc7Sjsg } else { 1212c349dbc7Sjsg offset += page << PAGE_SHIFT; 1213c349dbc7Sjsg } 1214c349dbc7Sjsg 1215c349dbc7Sjsg #ifdef __linux__ 1216c349dbc7Sjsg vaddr = (void __force *)io_mapping_map_atomic_wc(&ggtt->iomap, 1217c349dbc7Sjsg offset); 1218c349dbc7Sjsg #else 1219c349dbc7Sjsg agp_map_atomic(cache->map, offset, &cache->ioh); 1220c349dbc7Sjsg vaddr = bus_space_vaddr(cache->iot, cache->ioh); 1221c349dbc7Sjsg #endif 1222c349dbc7Sjsg cache->page = page; 1223c349dbc7Sjsg cache->vaddr = (unsigned long)vaddr; 1224c349dbc7Sjsg 1225c349dbc7Sjsg return vaddr; 1226c349dbc7Sjsg } 1227c349dbc7Sjsg 1228c349dbc7Sjsg static void *reloc_vaddr(struct drm_i915_gem_object *obj, 1229ad8b1aafSjsg struct i915_execbuffer *eb, 1230c349dbc7Sjsg unsigned long page) 1231c349dbc7Sjsg { 1232ad8b1aafSjsg struct reloc_cache *cache = &eb->reloc_cache; 1233c349dbc7Sjsg void *vaddr; 1234c349dbc7Sjsg 1235c349dbc7Sjsg if (cache->page == page) { 1236c349dbc7Sjsg vaddr = unmask_page(cache->vaddr); 1237c349dbc7Sjsg } else { 1238c349dbc7Sjsg vaddr = NULL; 1239c349dbc7Sjsg if ((cache->vaddr & KMAP) == 0) 1240ad8b1aafSjsg vaddr = reloc_iomap(obj, eb, page); 1241c349dbc7Sjsg if (!vaddr) 1242c349dbc7Sjsg vaddr = reloc_kmap(obj, cache, page); 1243c349dbc7Sjsg } 1244c349dbc7Sjsg 1245c349dbc7Sjsg return vaddr; 1246c349dbc7Sjsg } 1247c349dbc7Sjsg 1248c349dbc7Sjsg static void clflush_write32(u32 *addr, u32 value, unsigned int flushes) 1249c349dbc7Sjsg { 1250c349dbc7Sjsg if (unlikely(flushes & (CLFLUSH_BEFORE | CLFLUSH_AFTER))) { 1251c349dbc7Sjsg if (flushes & CLFLUSH_BEFORE) { 1252c349dbc7Sjsg clflushopt(addr); 1253c349dbc7Sjsg mb(); 1254c349dbc7Sjsg } 1255c349dbc7Sjsg 1256c349dbc7Sjsg *addr = value; 1257c349dbc7Sjsg 1258c349dbc7Sjsg /* 1259c349dbc7Sjsg * Writes to the same cacheline are serialised by the CPU 1260c349dbc7Sjsg * (including clflush). On the write path, we only require 1261c349dbc7Sjsg * that it hits memory in an orderly fashion and place 1262c349dbc7Sjsg * mb barriers at the start and end of the relocation phase 1263c349dbc7Sjsg * to ensure ordering of clflush wrt to the system. 1264c349dbc7Sjsg */ 1265c349dbc7Sjsg if (flushes & CLFLUSH_AFTER) 1266c349dbc7Sjsg clflushopt(addr); 1267c349dbc7Sjsg } else 1268c349dbc7Sjsg *addr = value; 1269c349dbc7Sjsg } 1270c349dbc7Sjsg 1271ad8b1aafSjsg static u64 1272ad8b1aafSjsg relocate_entry(struct i915_vma *vma, 1273ad8b1aafSjsg const struct drm_i915_gem_relocation_entry *reloc, 1274ad8b1aafSjsg struct i915_execbuffer *eb, 1275ad8b1aafSjsg const struct i915_vma *target) 1276ad8b1aafSjsg { 1277ad8b1aafSjsg u64 target_addr = relocation_target(reloc, target); 1278ad8b1aafSjsg u64 offset = reloc->offset; 1279ad8b1aafSjsg bool wide = eb->reloc_cache.use_64bit_reloc; 1280ad8b1aafSjsg void *vaddr; 1281ad8b1aafSjsg 1282c349dbc7Sjsg repeat: 1283ad8b1aafSjsg vaddr = reloc_vaddr(vma->obj, eb, 1284ad8b1aafSjsg offset >> PAGE_SHIFT); 1285c349dbc7Sjsg if (IS_ERR(vaddr)) 1286c349dbc7Sjsg return PTR_ERR(vaddr); 1287c349dbc7Sjsg 1288ad8b1aafSjsg GEM_BUG_ON(!IS_ALIGNED(offset, sizeof(u32))); 1289c349dbc7Sjsg clflush_write32(vaddr + offset_in_page(offset), 1290ad8b1aafSjsg lower_32_bits(target_addr), 1291c349dbc7Sjsg eb->reloc_cache.vaddr); 1292c349dbc7Sjsg 1293c349dbc7Sjsg if (wide) { 1294c349dbc7Sjsg offset += sizeof(u32); 1295ad8b1aafSjsg target_addr >>= 32; 1296c349dbc7Sjsg wide = false; 1297c349dbc7Sjsg goto repeat; 1298c349dbc7Sjsg } 1299c349dbc7Sjsg 1300c349dbc7Sjsg return target->node.start | UPDATE; 1301c349dbc7Sjsg } 1302c349dbc7Sjsg 1303c349dbc7Sjsg static u64 1304c349dbc7Sjsg eb_relocate_entry(struct i915_execbuffer *eb, 1305c349dbc7Sjsg struct eb_vma *ev, 1306c349dbc7Sjsg const struct drm_i915_gem_relocation_entry *reloc) 1307c349dbc7Sjsg { 1308c349dbc7Sjsg struct drm_i915_private *i915 = eb->i915; 1309c349dbc7Sjsg struct eb_vma *target; 1310c349dbc7Sjsg int err; 1311c349dbc7Sjsg 1312c349dbc7Sjsg /* we've already hold a reference to all valid objects */ 1313c349dbc7Sjsg target = eb_get_vma(eb, reloc->target_handle); 1314c349dbc7Sjsg if (unlikely(!target)) 1315c349dbc7Sjsg return -ENOENT; 1316c349dbc7Sjsg 1317c349dbc7Sjsg /* Validate that the target is in a valid r/w GPU domain */ 1318c349dbc7Sjsg if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) { 1319c349dbc7Sjsg drm_dbg(&i915->drm, "reloc with multiple write domains: " 1320c349dbc7Sjsg "target %d offset %d " 1321c349dbc7Sjsg "read %08x write %08x", 1322c349dbc7Sjsg reloc->target_handle, 1323c349dbc7Sjsg (int) reloc->offset, 1324c349dbc7Sjsg reloc->read_domains, 1325c349dbc7Sjsg reloc->write_domain); 1326c349dbc7Sjsg return -EINVAL; 1327c349dbc7Sjsg } 1328c349dbc7Sjsg if (unlikely((reloc->write_domain | reloc->read_domains) 1329c349dbc7Sjsg & ~I915_GEM_GPU_DOMAINS)) { 1330c349dbc7Sjsg drm_dbg(&i915->drm, "reloc with read/write non-GPU domains: " 1331c349dbc7Sjsg "target %d offset %d " 1332c349dbc7Sjsg "read %08x write %08x", 1333c349dbc7Sjsg reloc->target_handle, 1334c349dbc7Sjsg (int) reloc->offset, 1335c349dbc7Sjsg reloc->read_domains, 1336c349dbc7Sjsg reloc->write_domain); 1337c349dbc7Sjsg return -EINVAL; 1338c349dbc7Sjsg } 1339c349dbc7Sjsg 1340c349dbc7Sjsg if (reloc->write_domain) { 1341c349dbc7Sjsg target->flags |= EXEC_OBJECT_WRITE; 1342c349dbc7Sjsg 1343c349dbc7Sjsg /* 1344c349dbc7Sjsg * Sandybridge PPGTT errata: We need a global gtt mapping 1345c349dbc7Sjsg * for MI and pipe_control writes because the gpu doesn't 1346c349dbc7Sjsg * properly redirect them through the ppgtt for non_secure 1347c349dbc7Sjsg * batchbuffers. 1348c349dbc7Sjsg */ 1349c349dbc7Sjsg if (reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION && 1350*5ca02815Sjsg GRAPHICS_VER(eb->i915) == 6) { 1351c349dbc7Sjsg err = i915_vma_bind(target->vma, 1352c349dbc7Sjsg target->vma->obj->cache_level, 1353c349dbc7Sjsg PIN_GLOBAL, NULL); 1354ad8b1aafSjsg if (err) 1355c349dbc7Sjsg return err; 1356c349dbc7Sjsg } 1357c349dbc7Sjsg } 1358c349dbc7Sjsg 1359c349dbc7Sjsg /* 1360c349dbc7Sjsg * If the relocation already has the right value in it, no 1361c349dbc7Sjsg * more work needs to be done. 1362c349dbc7Sjsg */ 1363c349dbc7Sjsg if (!DBG_FORCE_RELOC && 1364c349dbc7Sjsg gen8_canonical_addr(target->vma->node.start) == reloc->presumed_offset) 1365c349dbc7Sjsg return 0; 1366c349dbc7Sjsg 1367c349dbc7Sjsg /* Check that the relocation address is valid... */ 1368c349dbc7Sjsg if (unlikely(reloc->offset > 1369c349dbc7Sjsg ev->vma->size - (eb->reloc_cache.use_64bit_reloc ? 8 : 4))) { 1370c349dbc7Sjsg drm_dbg(&i915->drm, "Relocation beyond object bounds: " 1371c349dbc7Sjsg "target %d offset %d size %d.\n", 1372c349dbc7Sjsg reloc->target_handle, 1373c349dbc7Sjsg (int)reloc->offset, 1374c349dbc7Sjsg (int)ev->vma->size); 1375c349dbc7Sjsg return -EINVAL; 1376c349dbc7Sjsg } 1377c349dbc7Sjsg if (unlikely(reloc->offset & 3)) { 1378c349dbc7Sjsg drm_dbg(&i915->drm, "Relocation not 4-byte aligned: " 1379c349dbc7Sjsg "target %d offset %d.\n", 1380c349dbc7Sjsg reloc->target_handle, 1381c349dbc7Sjsg (int)reloc->offset); 1382c349dbc7Sjsg return -EINVAL; 1383c349dbc7Sjsg } 1384c349dbc7Sjsg 1385c349dbc7Sjsg /* 1386c349dbc7Sjsg * If we write into the object, we need to force the synchronisation 1387c349dbc7Sjsg * barrier, either with an asynchronous clflush or if we executed the 1388c349dbc7Sjsg * patching using the GPU (though that should be serialised by the 1389c349dbc7Sjsg * timeline). To be completely sure, and since we are required to 1390c349dbc7Sjsg * do relocations we are already stalling, disable the user's opt 1391c349dbc7Sjsg * out of our synchronisation. 1392c349dbc7Sjsg */ 1393c349dbc7Sjsg ev->flags &= ~EXEC_OBJECT_ASYNC; 1394c349dbc7Sjsg 1395c349dbc7Sjsg /* and update the user's relocation entry */ 1396c349dbc7Sjsg return relocate_entry(ev->vma, reloc, eb, target->vma); 1397c349dbc7Sjsg } 1398c349dbc7Sjsg 1399c349dbc7Sjsg static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev) 1400c349dbc7Sjsg { 1401c349dbc7Sjsg #define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry)) 1402c349dbc7Sjsg struct drm_i915_gem_relocation_entry stack[N_RELOC(512)]; 1403c349dbc7Sjsg const struct drm_i915_gem_exec_object2 *entry = ev->exec; 1404ad8b1aafSjsg struct drm_i915_gem_relocation_entry __user *urelocs = 1405ad8b1aafSjsg u64_to_user_ptr(entry->relocs_ptr); 1406ad8b1aafSjsg unsigned long remain = entry->relocation_count; 1407c349dbc7Sjsg 1408c349dbc7Sjsg if (unlikely(remain > N_RELOC(ULONG_MAX))) 1409c349dbc7Sjsg return -EINVAL; 1410c349dbc7Sjsg 1411c349dbc7Sjsg /* 1412c349dbc7Sjsg * We must check that the entire relocation array is safe 1413c349dbc7Sjsg * to read. However, if the array is not writable the user loses 1414c349dbc7Sjsg * the updated relocation values. 1415c349dbc7Sjsg */ 1416c349dbc7Sjsg if (unlikely(!access_ok(urelocs, remain * sizeof(*urelocs)))) 1417c349dbc7Sjsg return -EFAULT; 1418c349dbc7Sjsg 1419c349dbc7Sjsg do { 1420c349dbc7Sjsg struct drm_i915_gem_relocation_entry *r = stack; 1421c349dbc7Sjsg unsigned int count = 1422ad8b1aafSjsg min_t(unsigned long, remain, ARRAY_SIZE(stack)); 1423c349dbc7Sjsg unsigned int copied; 1424c349dbc7Sjsg 1425c349dbc7Sjsg /* 1426c349dbc7Sjsg * This is the fast path and we cannot handle a pagefault 1427c349dbc7Sjsg * whilst holding the struct mutex lest the user pass in the 1428c349dbc7Sjsg * relocations contained within a mmaped bo. For in such a case 1429c349dbc7Sjsg * we, the page fault handler would call i915_gem_fault() and 1430c349dbc7Sjsg * we would try to acquire the struct mutex again. Obviously 1431c349dbc7Sjsg * this is bad and so lockdep complains vehemently. 1432c349dbc7Sjsg */ 1433ad8b1aafSjsg pagefault_disable(); 1434ad8b1aafSjsg copied = __copy_from_user_inatomic(r, urelocs, count * sizeof(r[0])); 1435ad8b1aafSjsg pagefault_enable(); 1436c349dbc7Sjsg if (unlikely(copied)) { 1437c349dbc7Sjsg remain = -EFAULT; 1438c349dbc7Sjsg goto out; 1439c349dbc7Sjsg } 1440c349dbc7Sjsg 1441c349dbc7Sjsg remain -= count; 1442c349dbc7Sjsg do { 1443c349dbc7Sjsg u64 offset = eb_relocate_entry(eb, ev, r); 1444c349dbc7Sjsg 1445c349dbc7Sjsg if (likely(offset == 0)) { 1446c349dbc7Sjsg } else if ((s64)offset < 0) { 1447c349dbc7Sjsg remain = (int)offset; 1448c349dbc7Sjsg goto out; 1449c349dbc7Sjsg } else { 1450c349dbc7Sjsg /* 1451c349dbc7Sjsg * Note that reporting an error now 1452c349dbc7Sjsg * leaves everything in an inconsistent 1453c349dbc7Sjsg * state as we have *already* changed 1454c349dbc7Sjsg * the relocation value inside the 1455c349dbc7Sjsg * object. As we have not changed the 1456c349dbc7Sjsg * reloc.presumed_offset or will not 1457c349dbc7Sjsg * change the execobject.offset, on the 1458c349dbc7Sjsg * call we may not rewrite the value 1459c349dbc7Sjsg * inside the object, leaving it 1460c349dbc7Sjsg * dangling and causing a GPU hang. Unless 1461c349dbc7Sjsg * userspace dynamically rebuilds the 1462c349dbc7Sjsg * relocations on each execbuf rather than 1463c349dbc7Sjsg * presume a static tree. 1464c349dbc7Sjsg * 1465c349dbc7Sjsg * We did previously check if the relocations 1466c349dbc7Sjsg * were writable (access_ok), an error now 1467c349dbc7Sjsg * would be a strange race with mprotect, 1468c349dbc7Sjsg * having already demonstrated that we 1469c349dbc7Sjsg * can read from this userspace address. 1470c349dbc7Sjsg */ 1471c349dbc7Sjsg offset = gen8_canonical_addr(offset & ~UPDATE); 1472c349dbc7Sjsg __put_user(offset, 1473c349dbc7Sjsg &urelocs[r - stack].presumed_offset); 1474c349dbc7Sjsg } 1475c349dbc7Sjsg } while (r++, --count); 1476c349dbc7Sjsg urelocs += ARRAY_SIZE(stack); 1477c349dbc7Sjsg } while (remain); 1478c349dbc7Sjsg out: 1479ad8b1aafSjsg reloc_cache_reset(&eb->reloc_cache, eb); 1480c349dbc7Sjsg return remain; 1481c349dbc7Sjsg } 1482c349dbc7Sjsg 1483ad8b1aafSjsg static int 1484ad8b1aafSjsg eb_relocate_vma_slow(struct i915_execbuffer *eb, struct eb_vma *ev) 1485c349dbc7Sjsg { 1486ad8b1aafSjsg const struct drm_i915_gem_exec_object2 *entry = ev->exec; 1487ad8b1aafSjsg struct drm_i915_gem_relocation_entry *relocs = 1488ad8b1aafSjsg u64_to_ptr(typeof(*relocs), entry->relocs_ptr); 1489ad8b1aafSjsg unsigned int i; 1490c349dbc7Sjsg int err; 1491c349dbc7Sjsg 1492ad8b1aafSjsg for (i = 0; i < entry->relocation_count; i++) { 1493ad8b1aafSjsg u64 offset = eb_relocate_entry(eb, ev, &relocs[i]); 1494c349dbc7Sjsg 1495ad8b1aafSjsg if ((s64)offset < 0) { 1496ad8b1aafSjsg err = (int)offset; 1497ad8b1aafSjsg goto err; 1498ad8b1aafSjsg } 1499ad8b1aafSjsg } 1500ad8b1aafSjsg err = 0; 1501ad8b1aafSjsg err: 1502ad8b1aafSjsg reloc_cache_reset(&eb->reloc_cache, eb); 1503ad8b1aafSjsg return err; 1504ad8b1aafSjsg } 1505ad8b1aafSjsg 1506ad8b1aafSjsg static int check_relocations(const struct drm_i915_gem_exec_object2 *entry) 1507ad8b1aafSjsg { 1508ad8b1aafSjsg const char __user *addr, *end; 1509ad8b1aafSjsg unsigned long size; 1510ad8b1aafSjsg char __maybe_unused c; 1511ad8b1aafSjsg 1512ad8b1aafSjsg size = entry->relocation_count; 1513ad8b1aafSjsg if (size == 0) 1514ad8b1aafSjsg return 0; 1515ad8b1aafSjsg 1516ad8b1aafSjsg if (size > N_RELOC(ULONG_MAX)) 1517ad8b1aafSjsg return -EINVAL; 1518ad8b1aafSjsg 1519ad8b1aafSjsg addr = u64_to_user_ptr(entry->relocs_ptr); 1520ad8b1aafSjsg size *= sizeof(struct drm_i915_gem_relocation_entry); 1521ad8b1aafSjsg if (!access_ok(addr, size)) 1522ad8b1aafSjsg return -EFAULT; 1523ad8b1aafSjsg 1524ad8b1aafSjsg end = addr + size; 1525ad8b1aafSjsg for (; addr < end; addr += PAGE_SIZE) { 1526ad8b1aafSjsg int err = __get_user(c, addr); 1527c349dbc7Sjsg if (err) 1528c349dbc7Sjsg return err; 1529c349dbc7Sjsg } 1530ad8b1aafSjsg return __get_user(c, end - 1); 1531ad8b1aafSjsg } 1532ad8b1aafSjsg 1533ad8b1aafSjsg static int eb_copy_relocations(const struct i915_execbuffer *eb) 1534ad8b1aafSjsg { 1535ad8b1aafSjsg struct drm_i915_gem_relocation_entry *relocs; 1536ad8b1aafSjsg const unsigned int count = eb->buffer_count; 1537ad8b1aafSjsg unsigned int i; 1538ad8b1aafSjsg int err; 1539ad8b1aafSjsg 1540ad8b1aafSjsg for (i = 0; i < count; i++) { 1541ad8b1aafSjsg const unsigned int nreloc = eb->exec[i].relocation_count; 1542ad8b1aafSjsg struct drm_i915_gem_relocation_entry __user *urelocs; 1543ad8b1aafSjsg unsigned long size; 1544ad8b1aafSjsg unsigned long copied; 1545ad8b1aafSjsg 1546ad8b1aafSjsg if (nreloc == 0) 1547ad8b1aafSjsg continue; 1548ad8b1aafSjsg 1549ad8b1aafSjsg err = check_relocations(&eb->exec[i]); 1550ad8b1aafSjsg if (err) 1551ad8b1aafSjsg goto err; 1552ad8b1aafSjsg 1553ad8b1aafSjsg urelocs = u64_to_user_ptr(eb->exec[i].relocs_ptr); 1554ad8b1aafSjsg size = nreloc * sizeof(*relocs); 1555ad8b1aafSjsg 1556ad8b1aafSjsg relocs = kvmalloc_array(size, 1, GFP_KERNEL); 1557ad8b1aafSjsg if (!relocs) { 1558ad8b1aafSjsg err = -ENOMEM; 1559ad8b1aafSjsg goto err; 1560ad8b1aafSjsg } 1561ad8b1aafSjsg 1562ad8b1aafSjsg /* copy_from_user is limited to < 4GiB */ 1563ad8b1aafSjsg copied = 0; 1564ad8b1aafSjsg do { 1565ad8b1aafSjsg unsigned int len = 1566ad8b1aafSjsg min_t(u64, BIT_ULL(31), size - copied); 1567ad8b1aafSjsg 1568ad8b1aafSjsg if (__copy_from_user((char *)relocs + copied, 1569ad8b1aafSjsg (char __user *)urelocs + copied, 1570ad8b1aafSjsg len)) 1571ad8b1aafSjsg goto end; 1572ad8b1aafSjsg 1573ad8b1aafSjsg copied += len; 1574ad8b1aafSjsg } while (copied < size); 1575ad8b1aafSjsg 1576ad8b1aafSjsg /* 1577ad8b1aafSjsg * As we do not update the known relocation offsets after 1578ad8b1aafSjsg * relocating (due to the complexities in lock handling), 1579ad8b1aafSjsg * we need to mark them as invalid now so that we force the 1580ad8b1aafSjsg * relocation processing next time. Just in case the target 1581ad8b1aafSjsg * object is evicted and then rebound into its old 1582ad8b1aafSjsg * presumed_offset before the next execbuffer - if that 1583ad8b1aafSjsg * happened we would make the mistake of assuming that the 1584ad8b1aafSjsg * relocations were valid. 1585ad8b1aafSjsg */ 1586ad8b1aafSjsg if (!user_access_begin(urelocs, size)) 1587ad8b1aafSjsg goto end; 1588ad8b1aafSjsg 1589ad8b1aafSjsg for (copied = 0; copied < nreloc; copied++) 1590ad8b1aafSjsg unsafe_put_user(-1, 1591ad8b1aafSjsg &urelocs[copied].presumed_offset, 1592ad8b1aafSjsg end_user); 1593ad8b1aafSjsg user_access_end(); 1594ad8b1aafSjsg 1595ad8b1aafSjsg eb->exec[i].relocs_ptr = (uintptr_t)relocs; 1596ad8b1aafSjsg } 1597ad8b1aafSjsg 1598ad8b1aafSjsg return 0; 1599ad8b1aafSjsg 1600ad8b1aafSjsg end_user: 1601ad8b1aafSjsg user_access_end(); 1602ad8b1aafSjsg end: 1603ad8b1aafSjsg kvfree(relocs); 1604ad8b1aafSjsg err = -EFAULT; 1605ad8b1aafSjsg err: 1606ad8b1aafSjsg while (i--) { 1607ad8b1aafSjsg relocs = u64_to_ptr(typeof(*relocs), eb->exec[i].relocs_ptr); 1608ad8b1aafSjsg if (eb->exec[i].relocation_count) 1609ad8b1aafSjsg kvfree(relocs); 1610ad8b1aafSjsg } 1611ad8b1aafSjsg return err; 1612ad8b1aafSjsg } 1613ad8b1aafSjsg 1614ad8b1aafSjsg static int eb_prefault_relocations(const struct i915_execbuffer *eb) 1615ad8b1aafSjsg { 1616ad8b1aafSjsg const unsigned int count = eb->buffer_count; 1617ad8b1aafSjsg unsigned int i; 1618ad8b1aafSjsg 1619ad8b1aafSjsg for (i = 0; i < count; i++) { 1620ad8b1aafSjsg int err; 1621ad8b1aafSjsg 1622ad8b1aafSjsg err = check_relocations(&eb->exec[i]); 1623ad8b1aafSjsg if (err) 1624ad8b1aafSjsg return err; 1625ad8b1aafSjsg } 1626ad8b1aafSjsg 1627ad8b1aafSjsg return 0; 1628ad8b1aafSjsg } 1629ad8b1aafSjsg 1630*5ca02815Sjsg static int eb_reinit_userptr(struct i915_execbuffer *eb) 1631*5ca02815Sjsg { 1632*5ca02815Sjsg const unsigned int count = eb->buffer_count; 1633*5ca02815Sjsg unsigned int i; 1634*5ca02815Sjsg int ret; 1635*5ca02815Sjsg 1636*5ca02815Sjsg if (likely(!(eb->args->flags & __EXEC_USERPTR_USED))) 1637*5ca02815Sjsg return 0; 1638*5ca02815Sjsg 1639*5ca02815Sjsg for (i = 0; i < count; i++) { 1640*5ca02815Sjsg struct eb_vma *ev = &eb->vma[i]; 1641*5ca02815Sjsg 1642*5ca02815Sjsg if (!i915_gem_object_is_userptr(ev->vma->obj)) 1643*5ca02815Sjsg continue; 1644*5ca02815Sjsg 1645*5ca02815Sjsg ret = i915_gem_object_userptr_submit_init(ev->vma->obj); 1646*5ca02815Sjsg if (ret) 1647*5ca02815Sjsg return ret; 1648*5ca02815Sjsg 1649*5ca02815Sjsg ev->flags |= __EXEC_OBJECT_USERPTR_INIT; 1650*5ca02815Sjsg } 1651*5ca02815Sjsg 1652*5ca02815Sjsg return 0; 1653*5ca02815Sjsg } 1654*5ca02815Sjsg 1655ad8b1aafSjsg static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb, 1656ad8b1aafSjsg struct i915_request *rq) 1657ad8b1aafSjsg { 1658ad8b1aafSjsg bool have_copy = false; 1659ad8b1aafSjsg struct eb_vma *ev; 1660ad8b1aafSjsg int err = 0; 1661ad8b1aafSjsg 1662ad8b1aafSjsg repeat: 1663ad8b1aafSjsg if (signal_pending(current)) { 1664ad8b1aafSjsg err = -ERESTARTSYS; 1665ad8b1aafSjsg goto out; 1666ad8b1aafSjsg } 1667ad8b1aafSjsg 1668ad8b1aafSjsg /* We may process another execbuffer during the unlock... */ 1669ad8b1aafSjsg eb_release_vmas(eb, false); 1670ad8b1aafSjsg i915_gem_ww_ctx_fini(&eb->ww); 1671ad8b1aafSjsg 1672ad8b1aafSjsg if (rq) { 1673ad8b1aafSjsg /* nonblocking is always false */ 1674ad8b1aafSjsg if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE, 1675ad8b1aafSjsg MAX_SCHEDULE_TIMEOUT) < 0) { 1676ad8b1aafSjsg i915_request_put(rq); 1677ad8b1aafSjsg rq = NULL; 1678ad8b1aafSjsg 1679ad8b1aafSjsg err = -EINTR; 1680ad8b1aafSjsg goto err_relock; 1681ad8b1aafSjsg } 1682ad8b1aafSjsg 1683ad8b1aafSjsg i915_request_put(rq); 1684ad8b1aafSjsg rq = NULL; 1685ad8b1aafSjsg } 1686ad8b1aafSjsg 1687ad8b1aafSjsg /* 1688ad8b1aafSjsg * We take 3 passes through the slowpatch. 1689ad8b1aafSjsg * 1690ad8b1aafSjsg * 1 - we try to just prefault all the user relocation entries and 1691ad8b1aafSjsg * then attempt to reuse the atomic pagefault disabled fast path again. 1692ad8b1aafSjsg * 1693ad8b1aafSjsg * 2 - we copy the user entries to a local buffer here outside of the 1694ad8b1aafSjsg * local and allow ourselves to wait upon any rendering before 1695ad8b1aafSjsg * relocations 1696ad8b1aafSjsg * 1697ad8b1aafSjsg * 3 - we already have a local copy of the relocation entries, but 1698ad8b1aafSjsg * were interrupted (EAGAIN) whilst waiting for the objects, try again. 1699ad8b1aafSjsg */ 1700ad8b1aafSjsg if (!err) { 1701ad8b1aafSjsg err = eb_prefault_relocations(eb); 1702ad8b1aafSjsg } else if (!have_copy) { 1703ad8b1aafSjsg err = eb_copy_relocations(eb); 1704ad8b1aafSjsg have_copy = err == 0; 1705ad8b1aafSjsg } else { 1706ad8b1aafSjsg cond_resched(); 1707ad8b1aafSjsg err = 0; 1708ad8b1aafSjsg } 1709ad8b1aafSjsg 1710ad8b1aafSjsg if (!err) 1711*5ca02815Sjsg err = eb_reinit_userptr(eb); 1712ad8b1aafSjsg 1713ad8b1aafSjsg err_relock: 1714ad8b1aafSjsg i915_gem_ww_ctx_init(&eb->ww, true); 1715ad8b1aafSjsg if (err) 1716ad8b1aafSjsg goto out; 1717ad8b1aafSjsg 1718ad8b1aafSjsg /* reacquire the objects */ 1719ad8b1aafSjsg repeat_validate: 1720ad8b1aafSjsg rq = eb_pin_engine(eb, false); 1721ad8b1aafSjsg if (IS_ERR(rq)) { 1722ad8b1aafSjsg err = PTR_ERR(rq); 1723ad8b1aafSjsg rq = NULL; 1724ad8b1aafSjsg goto err; 1725ad8b1aafSjsg } 1726ad8b1aafSjsg 1727ad8b1aafSjsg /* We didn't throttle, should be NULL */ 1728ad8b1aafSjsg GEM_WARN_ON(rq); 1729ad8b1aafSjsg 1730ad8b1aafSjsg err = eb_validate_vmas(eb); 1731ad8b1aafSjsg if (err) 1732ad8b1aafSjsg goto err; 1733ad8b1aafSjsg 1734ad8b1aafSjsg GEM_BUG_ON(!eb->batch); 1735ad8b1aafSjsg 1736ad8b1aafSjsg list_for_each_entry(ev, &eb->relocs, reloc_link) { 1737ad8b1aafSjsg if (!have_copy) { 1738ad8b1aafSjsg err = eb_relocate_vma(eb, ev); 1739ad8b1aafSjsg if (err) 1740ad8b1aafSjsg break; 1741ad8b1aafSjsg } else { 1742ad8b1aafSjsg err = eb_relocate_vma_slow(eb, ev); 1743ad8b1aafSjsg if (err) 1744ad8b1aafSjsg break; 1745ad8b1aafSjsg } 1746ad8b1aafSjsg } 1747ad8b1aafSjsg 1748ad8b1aafSjsg if (err == -EDEADLK) 1749ad8b1aafSjsg goto err; 1750ad8b1aafSjsg 1751ad8b1aafSjsg if (err && !have_copy) 1752ad8b1aafSjsg goto repeat; 1753ad8b1aafSjsg 1754ad8b1aafSjsg if (err) 1755ad8b1aafSjsg goto err; 1756ad8b1aafSjsg 1757ad8b1aafSjsg /* as last step, parse the command buffer */ 1758ad8b1aafSjsg err = eb_parse(eb); 1759ad8b1aafSjsg if (err) 1760ad8b1aafSjsg goto err; 1761ad8b1aafSjsg 1762ad8b1aafSjsg /* 1763ad8b1aafSjsg * Leave the user relocations as are, this is the painfully slow path, 1764ad8b1aafSjsg * and we want to avoid the complication of dropping the lock whilst 1765ad8b1aafSjsg * having buffers reserved in the aperture and so causing spurious 1766ad8b1aafSjsg * ENOSPC for random operations. 1767ad8b1aafSjsg */ 1768ad8b1aafSjsg 1769ad8b1aafSjsg err: 1770ad8b1aafSjsg if (err == -EDEADLK) { 1771ad8b1aafSjsg eb_release_vmas(eb, false); 1772ad8b1aafSjsg err = i915_gem_ww_ctx_backoff(&eb->ww); 1773ad8b1aafSjsg if (!err) 1774ad8b1aafSjsg goto repeat_validate; 1775ad8b1aafSjsg } 1776ad8b1aafSjsg 1777ad8b1aafSjsg if (err == -EAGAIN) 1778ad8b1aafSjsg goto repeat; 1779ad8b1aafSjsg 1780ad8b1aafSjsg out: 1781ad8b1aafSjsg if (have_copy) { 1782ad8b1aafSjsg const unsigned int count = eb->buffer_count; 1783ad8b1aafSjsg unsigned int i; 1784ad8b1aafSjsg 1785ad8b1aafSjsg for (i = 0; i < count; i++) { 1786ad8b1aafSjsg const struct drm_i915_gem_exec_object2 *entry = 1787ad8b1aafSjsg &eb->exec[i]; 1788ad8b1aafSjsg struct drm_i915_gem_relocation_entry *relocs; 1789ad8b1aafSjsg 1790ad8b1aafSjsg if (!entry->relocation_count) 1791ad8b1aafSjsg continue; 1792ad8b1aafSjsg 1793ad8b1aafSjsg relocs = u64_to_ptr(typeof(*relocs), entry->relocs_ptr); 1794ad8b1aafSjsg kvfree(relocs); 1795ad8b1aafSjsg } 1796ad8b1aafSjsg } 1797ad8b1aafSjsg 1798ad8b1aafSjsg if (rq) 1799ad8b1aafSjsg i915_request_put(rq); 1800ad8b1aafSjsg 1801ad8b1aafSjsg return err; 1802ad8b1aafSjsg } 1803ad8b1aafSjsg 1804ad8b1aafSjsg static int eb_relocate_parse(struct i915_execbuffer *eb) 1805ad8b1aafSjsg { 1806ad8b1aafSjsg int err; 1807ad8b1aafSjsg struct i915_request *rq = NULL; 1808ad8b1aafSjsg bool throttle = true; 1809ad8b1aafSjsg 1810ad8b1aafSjsg retry: 1811ad8b1aafSjsg rq = eb_pin_engine(eb, throttle); 1812ad8b1aafSjsg if (IS_ERR(rq)) { 1813ad8b1aafSjsg err = PTR_ERR(rq); 1814ad8b1aafSjsg rq = NULL; 1815ad8b1aafSjsg if (err != -EDEADLK) 1816ad8b1aafSjsg return err; 1817ad8b1aafSjsg 1818ad8b1aafSjsg goto err; 1819ad8b1aafSjsg } 1820ad8b1aafSjsg 1821ad8b1aafSjsg if (rq) { 1822ad8b1aafSjsg #ifdef __linux__ 1823ad8b1aafSjsg bool nonblock = eb->file->filp->f_flags & O_NONBLOCK; 1824ad8b1aafSjsg #else 1825ad8b1aafSjsg bool nonblock = eb->file->filp->f_flag & FNONBLOCK; 1826ad8b1aafSjsg #endif 1827ad8b1aafSjsg 1828ad8b1aafSjsg /* Need to drop all locks now for throttling, take slowpath */ 1829ad8b1aafSjsg err = i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE, 0); 1830ad8b1aafSjsg if (err == -ETIME) { 1831ad8b1aafSjsg if (nonblock) { 1832ad8b1aafSjsg err = -EWOULDBLOCK; 1833ad8b1aafSjsg i915_request_put(rq); 1834ad8b1aafSjsg goto err; 1835ad8b1aafSjsg } 1836ad8b1aafSjsg goto slow; 1837ad8b1aafSjsg } 1838ad8b1aafSjsg i915_request_put(rq); 1839ad8b1aafSjsg rq = NULL; 1840ad8b1aafSjsg } 1841ad8b1aafSjsg 1842ad8b1aafSjsg /* only throttle once, even if we didn't need to throttle */ 1843ad8b1aafSjsg throttle = false; 1844ad8b1aafSjsg 1845ad8b1aafSjsg err = eb_validate_vmas(eb); 1846ad8b1aafSjsg if (err == -EAGAIN) 1847ad8b1aafSjsg goto slow; 1848ad8b1aafSjsg else if (err) 1849ad8b1aafSjsg goto err; 1850c349dbc7Sjsg 1851c349dbc7Sjsg /* The objects are in their final locations, apply the relocations. */ 1852c349dbc7Sjsg if (eb->args->flags & __EXEC_HAS_RELOC) { 1853c349dbc7Sjsg struct eb_vma *ev; 1854c349dbc7Sjsg 1855c349dbc7Sjsg list_for_each_entry(ev, &eb->relocs, reloc_link) { 1856c349dbc7Sjsg err = eb_relocate_vma(eb, ev); 1857c349dbc7Sjsg if (err) 1858ad8b1aafSjsg break; 1859c349dbc7Sjsg } 1860c349dbc7Sjsg 1861ad8b1aafSjsg if (err == -EDEADLK) 1862ad8b1aafSjsg goto err; 1863ad8b1aafSjsg else if (err) 1864ad8b1aafSjsg goto slow; 1865ad8b1aafSjsg } 1866ad8b1aafSjsg 1867ad8b1aafSjsg if (!err) 1868ad8b1aafSjsg err = eb_parse(eb); 1869ad8b1aafSjsg 1870ad8b1aafSjsg err: 1871ad8b1aafSjsg if (err == -EDEADLK) { 1872ad8b1aafSjsg eb_release_vmas(eb, false); 1873ad8b1aafSjsg err = i915_gem_ww_ctx_backoff(&eb->ww); 1874ad8b1aafSjsg if (!err) 1875ad8b1aafSjsg goto retry; 1876ad8b1aafSjsg } 1877ad8b1aafSjsg 1878ad8b1aafSjsg return err; 1879ad8b1aafSjsg 1880ad8b1aafSjsg slow: 1881ad8b1aafSjsg err = eb_relocate_parse_slow(eb, rq); 1882ad8b1aafSjsg if (err) 1883ad8b1aafSjsg /* 1884ad8b1aafSjsg * If the user expects the execobject.offset and 1885ad8b1aafSjsg * reloc.presumed_offset to be an exact match, 1886ad8b1aafSjsg * as for using NO_RELOC, then we cannot update 1887ad8b1aafSjsg * the execobject.offset until we have completed 1888ad8b1aafSjsg * relocation. 1889ad8b1aafSjsg */ 1890ad8b1aafSjsg eb->args->flags &= ~__EXEC_HAS_RELOC; 1891ad8b1aafSjsg 1892ad8b1aafSjsg return err; 1893c349dbc7Sjsg } 1894c349dbc7Sjsg 1895c349dbc7Sjsg static int eb_move_to_gpu(struct i915_execbuffer *eb) 1896c349dbc7Sjsg { 1897c349dbc7Sjsg const unsigned int count = eb->buffer_count; 1898ad8b1aafSjsg unsigned int i = count; 1899c349dbc7Sjsg int err = 0; 1900c349dbc7Sjsg 1901c349dbc7Sjsg while (i--) { 1902c349dbc7Sjsg struct eb_vma *ev = &eb->vma[i]; 1903c349dbc7Sjsg struct i915_vma *vma = ev->vma; 1904c349dbc7Sjsg unsigned int flags = ev->flags; 1905c349dbc7Sjsg struct drm_i915_gem_object *obj = vma->obj; 1906c349dbc7Sjsg 1907c349dbc7Sjsg assert_vma_held(vma); 1908c349dbc7Sjsg 1909c349dbc7Sjsg if (flags & EXEC_OBJECT_CAPTURE) { 1910c349dbc7Sjsg struct i915_capture_list *capture; 1911c349dbc7Sjsg 1912c349dbc7Sjsg capture = kmalloc(sizeof(*capture), GFP_KERNEL); 1913c349dbc7Sjsg if (capture) { 1914c349dbc7Sjsg capture->next = eb->request->capture_list; 1915c349dbc7Sjsg capture->vma = vma; 1916c349dbc7Sjsg eb->request->capture_list = capture; 1917c349dbc7Sjsg } 1918c349dbc7Sjsg } 1919c349dbc7Sjsg 1920c349dbc7Sjsg /* 1921c349dbc7Sjsg * If the GPU is not _reading_ through the CPU cache, we need 1922c349dbc7Sjsg * to make sure that any writes (both previous GPU writes from 1923c349dbc7Sjsg * before a change in snooping levels and normal CPU writes) 1924c349dbc7Sjsg * caught in that cache are flushed to main memory. 1925c349dbc7Sjsg * 1926c349dbc7Sjsg * We want to say 1927c349dbc7Sjsg * obj->cache_dirty && 1928c349dbc7Sjsg * !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ) 1929c349dbc7Sjsg * but gcc's optimiser doesn't handle that as well and emits 1930c349dbc7Sjsg * two jumps instead of one. Maybe one day... 1931c349dbc7Sjsg */ 1932c349dbc7Sjsg if (unlikely(obj->cache_dirty & ~obj->cache_coherent)) { 1933c349dbc7Sjsg if (i915_gem_clflush_object(obj, 0)) 1934c349dbc7Sjsg flags &= ~EXEC_OBJECT_ASYNC; 1935c349dbc7Sjsg } 1936c349dbc7Sjsg 1937c349dbc7Sjsg if (err == 0 && !(flags & EXEC_OBJECT_ASYNC)) { 1938c349dbc7Sjsg err = i915_request_await_object 1939c349dbc7Sjsg (eb->request, obj, flags & EXEC_OBJECT_WRITE); 1940c349dbc7Sjsg } 1941c349dbc7Sjsg 1942c349dbc7Sjsg if (err == 0) 1943*5ca02815Sjsg err = i915_vma_move_to_active(vma, eb->request, 1944*5ca02815Sjsg flags | __EXEC_OBJECT_NO_RESERVE); 1945c349dbc7Sjsg } 1946c349dbc7Sjsg 1947*5ca02815Sjsg #ifdef CONFIG_MMU_NOTIFIER 1948*5ca02815Sjsg if (!err && (eb->args->flags & __EXEC_USERPTR_USED)) { 1949*5ca02815Sjsg read_lock(&eb->i915->mm.notifier_lock); 1950*5ca02815Sjsg 1951*5ca02815Sjsg /* 1952*5ca02815Sjsg * count is always at least 1, otherwise __EXEC_USERPTR_USED 1953*5ca02815Sjsg * could not have been set 1954*5ca02815Sjsg */ 1955*5ca02815Sjsg for (i = 0; i < count; i++) { 1956*5ca02815Sjsg struct eb_vma *ev = &eb->vma[i]; 1957*5ca02815Sjsg struct drm_i915_gem_object *obj = ev->vma->obj; 1958*5ca02815Sjsg 1959*5ca02815Sjsg if (!i915_gem_object_is_userptr(obj)) 1960*5ca02815Sjsg continue; 1961*5ca02815Sjsg 1962*5ca02815Sjsg err = i915_gem_object_userptr_submit_done(obj); 1963*5ca02815Sjsg if (err) 1964*5ca02815Sjsg break; 1965*5ca02815Sjsg } 1966*5ca02815Sjsg 1967*5ca02815Sjsg read_unlock(&eb->i915->mm.notifier_lock); 1968*5ca02815Sjsg } 1969*5ca02815Sjsg #endif 1970*5ca02815Sjsg 1971c349dbc7Sjsg if (unlikely(err)) 1972c349dbc7Sjsg goto err_skip; 1973c349dbc7Sjsg 1974c349dbc7Sjsg /* Unconditionally flush any chipset caches (for streaming writes). */ 1975c349dbc7Sjsg intel_gt_chipset_flush(eb->engine->gt); 1976c349dbc7Sjsg return 0; 1977c349dbc7Sjsg 1978c349dbc7Sjsg err_skip: 1979c349dbc7Sjsg i915_request_set_error_once(eb->request, err); 1980c349dbc7Sjsg return err; 1981c349dbc7Sjsg } 1982c349dbc7Sjsg 1983c349dbc7Sjsg static int i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) 1984c349dbc7Sjsg { 1985c349dbc7Sjsg if (exec->flags & __I915_EXEC_ILLEGAL_FLAGS) 1986c349dbc7Sjsg return -EINVAL; 1987c349dbc7Sjsg 1988c349dbc7Sjsg /* Kernel clipping was a DRI1 misfeature */ 1989ad8b1aafSjsg if (!(exec->flags & (I915_EXEC_FENCE_ARRAY | 1990ad8b1aafSjsg I915_EXEC_USE_EXTENSIONS))) { 1991c349dbc7Sjsg if (exec->num_cliprects || exec->cliprects_ptr) 1992c349dbc7Sjsg return -EINVAL; 1993c349dbc7Sjsg } 1994c349dbc7Sjsg 1995c349dbc7Sjsg if (exec->DR4 == 0xffffffff) { 1996c349dbc7Sjsg DRM_DEBUG("UXA submitting garbage DR4, fixing up\n"); 1997c349dbc7Sjsg exec->DR4 = 0; 1998c349dbc7Sjsg } 1999c349dbc7Sjsg if (exec->DR1 || exec->DR4) 2000c349dbc7Sjsg return -EINVAL; 2001c349dbc7Sjsg 2002c349dbc7Sjsg if ((exec->batch_start_offset | exec->batch_len) & 0x7) 2003c349dbc7Sjsg return -EINVAL; 2004c349dbc7Sjsg 2005c349dbc7Sjsg return 0; 2006c349dbc7Sjsg } 2007c349dbc7Sjsg 2008c349dbc7Sjsg static int i915_reset_gen7_sol_offsets(struct i915_request *rq) 2009c349dbc7Sjsg { 2010c349dbc7Sjsg u32 *cs; 2011c349dbc7Sjsg int i; 2012c349dbc7Sjsg 2013*5ca02815Sjsg if (GRAPHICS_VER(rq->engine->i915) != 7 || rq->engine->id != RCS0) { 2014ad8b1aafSjsg drm_dbg(&rq->engine->i915->drm, "sol reset is gen7/rcs only\n"); 2015c349dbc7Sjsg return -EINVAL; 2016c349dbc7Sjsg } 2017c349dbc7Sjsg 2018c349dbc7Sjsg cs = intel_ring_begin(rq, 4 * 2 + 2); 2019c349dbc7Sjsg if (IS_ERR(cs)) 2020c349dbc7Sjsg return PTR_ERR(cs); 2021c349dbc7Sjsg 2022c349dbc7Sjsg *cs++ = MI_LOAD_REGISTER_IMM(4); 2023c349dbc7Sjsg for (i = 0; i < 4; i++) { 2024c349dbc7Sjsg *cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i)); 2025c349dbc7Sjsg *cs++ = 0; 2026c349dbc7Sjsg } 2027c349dbc7Sjsg *cs++ = MI_NOOP; 2028c349dbc7Sjsg intel_ring_advance(rq, cs); 2029c349dbc7Sjsg 2030c349dbc7Sjsg return 0; 2031c349dbc7Sjsg } 2032c349dbc7Sjsg 2033c349dbc7Sjsg static struct i915_vma * 2034ad8b1aafSjsg shadow_batch_pin(struct i915_execbuffer *eb, 2035ad8b1aafSjsg struct drm_i915_gem_object *obj, 2036c349dbc7Sjsg struct i915_address_space *vm, 2037c349dbc7Sjsg unsigned int flags) 2038c349dbc7Sjsg { 2039c349dbc7Sjsg struct i915_vma *vma; 2040c349dbc7Sjsg int err; 2041c349dbc7Sjsg 2042c349dbc7Sjsg vma = i915_vma_instance(obj, vm, NULL); 2043c349dbc7Sjsg if (IS_ERR(vma)) 2044c349dbc7Sjsg return vma; 2045c349dbc7Sjsg 2046ad8b1aafSjsg err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, flags); 2047c349dbc7Sjsg if (err) 2048c349dbc7Sjsg return ERR_PTR(err); 2049c349dbc7Sjsg 2050c349dbc7Sjsg return vma; 2051c349dbc7Sjsg } 2052c349dbc7Sjsg 2053ad8b1aafSjsg static struct i915_vma *eb_dispatch_secure(struct i915_execbuffer *eb, struct i915_vma *vma) 2054ad8b1aafSjsg { 2055ad8b1aafSjsg /* 2056ad8b1aafSjsg * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure 2057ad8b1aafSjsg * batch" bit. Hence we need to pin secure batches into the global gtt. 2058ad8b1aafSjsg * hsw should have this fixed, but bdw mucks it up again. */ 2059ad8b1aafSjsg if (eb->batch_flags & I915_DISPATCH_SECURE) 2060ad8b1aafSjsg return i915_gem_object_ggtt_pin_ww(vma->obj, &eb->ww, NULL, 0, 0, 0); 2061ad8b1aafSjsg 2062ad8b1aafSjsg return NULL; 2063ad8b1aafSjsg } 2064ad8b1aafSjsg 2065c349dbc7Sjsg static int eb_parse(struct i915_execbuffer *eb) 2066c349dbc7Sjsg { 2067c349dbc7Sjsg struct drm_i915_private *i915 = eb->i915; 2068ad8b1aafSjsg struct intel_gt_buffer_pool_node *pool = eb->batch_pool; 2069ad8b1aafSjsg struct i915_vma *shadow, *trampoline, *batch; 2070ad8b1aafSjsg unsigned long len; 2071c349dbc7Sjsg int err; 2072c349dbc7Sjsg 2073ad8b1aafSjsg if (!eb_use_cmdparser(eb)) { 2074ad8b1aafSjsg batch = eb_dispatch_secure(eb, eb->batch->vma); 2075ad8b1aafSjsg if (IS_ERR(batch)) 2076ad8b1aafSjsg return PTR_ERR(batch); 2077ad8b1aafSjsg 2078ad8b1aafSjsg goto secure_batch; 2079ad8b1aafSjsg } 2080c349dbc7Sjsg 2081c349dbc7Sjsg len = eb->batch_len; 2082c349dbc7Sjsg if (!CMDPARSER_USES_GGTT(eb->i915)) { 2083c349dbc7Sjsg /* 2084c349dbc7Sjsg * ppGTT backed shadow buffers must be mapped RO, to prevent 2085c349dbc7Sjsg * post-scan tampering 2086c349dbc7Sjsg */ 2087c349dbc7Sjsg if (!eb->context->vm->has_read_only) { 2088c349dbc7Sjsg drm_dbg(&i915->drm, 2089c349dbc7Sjsg "Cannot prevent post-scan tampering without RO capable vm\n"); 2090c349dbc7Sjsg return -EINVAL; 2091c349dbc7Sjsg } 2092c349dbc7Sjsg } else { 2093c349dbc7Sjsg len += I915_CMD_PARSER_TRAMPOLINE_SIZE; 2094c349dbc7Sjsg } 2095ad8b1aafSjsg if (unlikely(len < eb->batch_len)) /* last paranoid check of overflow */ 2096ad8b1aafSjsg return -EINVAL; 2097c349dbc7Sjsg 2098ad8b1aafSjsg if (!pool) { 2099*5ca02815Sjsg pool = intel_gt_get_buffer_pool(eb->engine->gt, len, 2100*5ca02815Sjsg I915_MAP_WB); 2101c349dbc7Sjsg if (IS_ERR(pool)) 2102c349dbc7Sjsg return PTR_ERR(pool); 2103ad8b1aafSjsg eb->batch_pool = pool; 2104ad8b1aafSjsg } 2105c349dbc7Sjsg 2106ad8b1aafSjsg err = i915_gem_object_lock(pool->obj, &eb->ww); 2107ad8b1aafSjsg if (err) 2108ad8b1aafSjsg goto err; 2109ad8b1aafSjsg 2110ad8b1aafSjsg shadow = shadow_batch_pin(eb, pool->obj, eb->context->vm, PIN_USER); 2111c349dbc7Sjsg if (IS_ERR(shadow)) { 2112c349dbc7Sjsg err = PTR_ERR(shadow); 2113c349dbc7Sjsg goto err; 2114c349dbc7Sjsg } 2115*5ca02815Sjsg intel_gt_buffer_pool_mark_used(pool); 2116c349dbc7Sjsg i915_gem_object_set_readonly(shadow->obj); 2117ad8b1aafSjsg shadow->private = pool; 2118c349dbc7Sjsg 2119c349dbc7Sjsg trampoline = NULL; 2120c349dbc7Sjsg if (CMDPARSER_USES_GGTT(eb->i915)) { 2121c349dbc7Sjsg trampoline = shadow; 2122c349dbc7Sjsg 2123ad8b1aafSjsg shadow = shadow_batch_pin(eb, pool->obj, 2124c349dbc7Sjsg &eb->engine->gt->ggtt->vm, 2125c349dbc7Sjsg PIN_GLOBAL); 2126c349dbc7Sjsg if (IS_ERR(shadow)) { 2127c349dbc7Sjsg err = PTR_ERR(shadow); 2128c349dbc7Sjsg shadow = trampoline; 2129c349dbc7Sjsg goto err_shadow; 2130c349dbc7Sjsg } 2131ad8b1aafSjsg shadow->private = pool; 2132c349dbc7Sjsg 2133c349dbc7Sjsg eb->batch_flags |= I915_DISPATCH_SECURE; 2134c349dbc7Sjsg } 2135c349dbc7Sjsg 2136a37a8dbaSjsg batch = eb_dispatch_secure(eb, shadow); 2137a37a8dbaSjsg if (IS_ERR(batch)) { 2138a37a8dbaSjsg err = PTR_ERR(batch); 2139a37a8dbaSjsg goto err_trampoline; 2140a37a8dbaSjsg } 2141a37a8dbaSjsg 2142*5ca02815Sjsg err = dma_resv_reserve_shared(shadow->resv, 1); 2143*5ca02815Sjsg if (err) 2144*5ca02815Sjsg goto err_trampoline; 2145*5ca02815Sjsg 2146d9ace711Sjsg err = intel_engine_cmd_parser(eb->engine, 2147d9ace711Sjsg eb->batch->vma, 2148d9ace711Sjsg eb->batch_start_offset, 2149d9ace711Sjsg eb->batch_len, 2150d9ace711Sjsg shadow, trampoline); 2151c349dbc7Sjsg if (err) 2152ad8b1aafSjsg goto err_unpin_batch; 2153c349dbc7Sjsg 2154c349dbc7Sjsg eb->batch = &eb->vma[eb->buffer_count++]; 2155ad8b1aafSjsg eb->batch->vma = i915_vma_get(shadow); 2156ad8b1aafSjsg eb->batch->flags = __EXEC_OBJECT_HAS_PIN; 2157c349dbc7Sjsg 2158c349dbc7Sjsg eb->trampoline = trampoline; 2159c349dbc7Sjsg eb->batch_start_offset = 0; 2160c349dbc7Sjsg 2161ad8b1aafSjsg secure_batch: 2162ad8b1aafSjsg if (batch) { 2163ad8b1aafSjsg eb->batch = &eb->vma[eb->buffer_count++]; 2164ad8b1aafSjsg eb->batch->flags = __EXEC_OBJECT_HAS_PIN; 2165ad8b1aafSjsg eb->batch->vma = i915_vma_get(batch); 2166ad8b1aafSjsg } 2167c349dbc7Sjsg return 0; 2168c349dbc7Sjsg 2169ad8b1aafSjsg err_unpin_batch: 2170ad8b1aafSjsg if (batch) 2171ad8b1aafSjsg i915_vma_unpin(batch); 2172a37a8dbaSjsg err_trampoline: 2173c349dbc7Sjsg if (trampoline) 2174c349dbc7Sjsg i915_vma_unpin(trampoline); 2175c349dbc7Sjsg err_shadow: 2176c349dbc7Sjsg i915_vma_unpin(shadow); 2177c349dbc7Sjsg err: 2178c349dbc7Sjsg return err; 2179c349dbc7Sjsg } 2180c349dbc7Sjsg 2181c349dbc7Sjsg static int eb_submit(struct i915_execbuffer *eb, struct i915_vma *batch) 2182c349dbc7Sjsg { 2183c349dbc7Sjsg int err; 2184c349dbc7Sjsg 2185*5ca02815Sjsg if (intel_context_nopreempt(eb->context)) 2186*5ca02815Sjsg __set_bit(I915_FENCE_FLAG_NOPREEMPT, &eb->request->fence.flags); 2187*5ca02815Sjsg 2188c349dbc7Sjsg err = eb_move_to_gpu(eb); 2189c349dbc7Sjsg if (err) 2190c349dbc7Sjsg return err; 2191c349dbc7Sjsg 2192c349dbc7Sjsg if (eb->args->flags & I915_EXEC_GEN7_SOL_RESET) { 2193c349dbc7Sjsg err = i915_reset_gen7_sol_offsets(eb->request); 2194c349dbc7Sjsg if (err) 2195c349dbc7Sjsg return err; 2196c349dbc7Sjsg } 2197c349dbc7Sjsg 2198c349dbc7Sjsg /* 2199c349dbc7Sjsg * After we completed waiting for other engines (using HW semaphores) 2200c349dbc7Sjsg * then we can signal that this request/batch is ready to run. This 2201c349dbc7Sjsg * allows us to determine if the batch is still waiting on the GPU 2202c349dbc7Sjsg * or actually running by checking the breadcrumb. 2203c349dbc7Sjsg */ 2204c349dbc7Sjsg if (eb->engine->emit_init_breadcrumb) { 2205c349dbc7Sjsg err = eb->engine->emit_init_breadcrumb(eb->request); 2206c349dbc7Sjsg if (err) 2207c349dbc7Sjsg return err; 2208c349dbc7Sjsg } 2209c349dbc7Sjsg 2210c349dbc7Sjsg err = eb->engine->emit_bb_start(eb->request, 2211c349dbc7Sjsg batch->node.start + 2212c349dbc7Sjsg eb->batch_start_offset, 2213c349dbc7Sjsg eb->batch_len, 2214c349dbc7Sjsg eb->batch_flags); 2215c349dbc7Sjsg if (err) 2216c349dbc7Sjsg return err; 2217c349dbc7Sjsg 2218c349dbc7Sjsg if (eb->trampoline) { 2219c349dbc7Sjsg GEM_BUG_ON(eb->batch_start_offset); 2220c349dbc7Sjsg err = eb->engine->emit_bb_start(eb->request, 2221c349dbc7Sjsg eb->trampoline->node.start + 2222c349dbc7Sjsg eb->batch_len, 2223c349dbc7Sjsg 0, 0); 2224c349dbc7Sjsg if (err) 2225c349dbc7Sjsg return err; 2226c349dbc7Sjsg } 2227c349dbc7Sjsg 2228c349dbc7Sjsg return 0; 2229c349dbc7Sjsg } 2230c349dbc7Sjsg 2231c349dbc7Sjsg static int num_vcs_engines(const struct drm_i915_private *i915) 2232c349dbc7Sjsg { 2233*5ca02815Sjsg return hweight_long(VDBOX_MASK(&i915->gt)); 2234c349dbc7Sjsg } 2235c349dbc7Sjsg 2236c349dbc7Sjsg /* 2237c349dbc7Sjsg * Find one BSD ring to dispatch the corresponding BSD command. 2238c349dbc7Sjsg * The engine index is returned. 2239c349dbc7Sjsg */ 2240c349dbc7Sjsg static unsigned int 2241c349dbc7Sjsg gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv, 2242c349dbc7Sjsg struct drm_file *file) 2243c349dbc7Sjsg { 2244c349dbc7Sjsg struct drm_i915_file_private *file_priv = file->driver_priv; 2245c349dbc7Sjsg 2246c349dbc7Sjsg /* Check whether the file_priv has already selected one ring. */ 2247c349dbc7Sjsg if ((int)file_priv->bsd_engine < 0) 2248c349dbc7Sjsg file_priv->bsd_engine = 2249c349dbc7Sjsg get_random_int() % num_vcs_engines(dev_priv); 2250c349dbc7Sjsg 2251c349dbc7Sjsg return file_priv->bsd_engine; 2252c349dbc7Sjsg } 2253c349dbc7Sjsg 2254c349dbc7Sjsg static const enum intel_engine_id user_ring_map[] = { 2255c349dbc7Sjsg [I915_EXEC_DEFAULT] = RCS0, 2256c349dbc7Sjsg [I915_EXEC_RENDER] = RCS0, 2257c349dbc7Sjsg [I915_EXEC_BLT] = BCS0, 2258c349dbc7Sjsg [I915_EXEC_BSD] = VCS0, 2259c349dbc7Sjsg [I915_EXEC_VEBOX] = VECS0 2260c349dbc7Sjsg }; 2261c349dbc7Sjsg 2262ad8b1aafSjsg static struct i915_request *eb_throttle(struct i915_execbuffer *eb, struct intel_context *ce) 2263c349dbc7Sjsg { 2264c349dbc7Sjsg struct intel_ring *ring = ce->ring; 2265c349dbc7Sjsg struct intel_timeline *tl = ce->timeline; 2266c349dbc7Sjsg struct i915_request *rq; 2267c349dbc7Sjsg 2268c349dbc7Sjsg /* 2269c349dbc7Sjsg * Completely unscientific finger-in-the-air estimates for suitable 2270c349dbc7Sjsg * maximum user request size (to avoid blocking) and then backoff. 2271c349dbc7Sjsg */ 2272c349dbc7Sjsg if (intel_ring_update_space(ring) >= PAGE_SIZE) 2273c349dbc7Sjsg return NULL; 2274c349dbc7Sjsg 2275c349dbc7Sjsg /* 2276c349dbc7Sjsg * Find a request that after waiting upon, there will be at least half 2277c349dbc7Sjsg * the ring available. The hysteresis allows us to compete for the 2278c349dbc7Sjsg * shared ring and should mean that we sleep less often prior to 2279c349dbc7Sjsg * claiming our resources, but not so long that the ring completely 2280c349dbc7Sjsg * drains before we can submit our next request. 2281c349dbc7Sjsg */ 2282c349dbc7Sjsg list_for_each_entry(rq, &tl->requests, link) { 2283c349dbc7Sjsg if (rq->ring != ring) 2284c349dbc7Sjsg continue; 2285c349dbc7Sjsg 2286c349dbc7Sjsg if (__intel_ring_space(rq->postfix, 2287c349dbc7Sjsg ring->emit, ring->size) > ring->size / 2) 2288c349dbc7Sjsg break; 2289c349dbc7Sjsg } 2290c349dbc7Sjsg if (&rq->link == &tl->requests) 2291c349dbc7Sjsg return NULL; /* weird, we will check again later for real */ 2292c349dbc7Sjsg 2293c349dbc7Sjsg return i915_request_get(rq); 2294c349dbc7Sjsg } 2295c349dbc7Sjsg 2296ad8b1aafSjsg static struct i915_request *eb_pin_engine(struct i915_execbuffer *eb, bool throttle) 2297c349dbc7Sjsg { 2298ad8b1aafSjsg struct intel_context *ce = eb->context; 2299c349dbc7Sjsg struct intel_timeline *tl; 2300ad8b1aafSjsg struct i915_request *rq = NULL; 2301c349dbc7Sjsg int err; 2302c349dbc7Sjsg 2303ad8b1aafSjsg GEM_BUG_ON(eb->args->flags & __EXEC_ENGINE_PINNED); 2304c349dbc7Sjsg 2305c349dbc7Sjsg if (unlikely(intel_context_is_banned(ce))) 2306ad8b1aafSjsg return ERR_PTR(-EIO); 2307c349dbc7Sjsg 2308c349dbc7Sjsg /* 2309c349dbc7Sjsg * Pinning the contexts may generate requests in order to acquire 2310c349dbc7Sjsg * GGTT space, so do this first before we reserve a seqno for 2311c349dbc7Sjsg * ourselves. 2312c349dbc7Sjsg */ 2313ad8b1aafSjsg err = intel_context_pin_ww(ce, &eb->ww); 2314c349dbc7Sjsg if (err) 2315ad8b1aafSjsg return ERR_PTR(err); 2316c349dbc7Sjsg 2317c349dbc7Sjsg /* 2318c349dbc7Sjsg * Take a local wakeref for preparing to dispatch the execbuf as 2319c349dbc7Sjsg * we expect to access the hardware fairly frequently in the 2320c349dbc7Sjsg * process, and require the engine to be kept awake between accesses. 2321c349dbc7Sjsg * Upon dispatch, we acquire another prolonged wakeref that we hold 2322c349dbc7Sjsg * until the timeline is idle, which in turn releases the wakeref 2323c349dbc7Sjsg * taken on the engine, and the parent device. 2324c349dbc7Sjsg */ 2325c349dbc7Sjsg tl = intel_context_timeline_lock(ce); 2326c349dbc7Sjsg if (IS_ERR(tl)) { 2327ad8b1aafSjsg intel_context_unpin(ce); 2328ad8b1aafSjsg return ERR_CAST(tl); 2329c349dbc7Sjsg } 2330c349dbc7Sjsg 2331c349dbc7Sjsg intel_context_enter(ce); 2332ad8b1aafSjsg if (throttle) 2333ad8b1aafSjsg rq = eb_throttle(eb, ce); 2334c349dbc7Sjsg intel_context_timeline_unlock(tl); 2335c349dbc7Sjsg 2336ad8b1aafSjsg eb->args->flags |= __EXEC_ENGINE_PINNED; 2337ad8b1aafSjsg return rq; 2338c349dbc7Sjsg } 2339c349dbc7Sjsg 2340c349dbc7Sjsg static void eb_unpin_engine(struct i915_execbuffer *eb) 2341c349dbc7Sjsg { 2342c349dbc7Sjsg struct intel_context *ce = eb->context; 2343c349dbc7Sjsg struct intel_timeline *tl = ce->timeline; 2344c349dbc7Sjsg 2345ad8b1aafSjsg if (!(eb->args->flags & __EXEC_ENGINE_PINNED)) 2346ad8b1aafSjsg return; 2347ad8b1aafSjsg 2348ad8b1aafSjsg eb->args->flags &= ~__EXEC_ENGINE_PINNED; 2349ad8b1aafSjsg 2350c349dbc7Sjsg mutex_lock(&tl->mutex); 2351c349dbc7Sjsg intel_context_exit(ce); 2352c349dbc7Sjsg mutex_unlock(&tl->mutex); 2353c349dbc7Sjsg 2354c349dbc7Sjsg intel_context_unpin(ce); 2355c349dbc7Sjsg } 2356c349dbc7Sjsg 2357c349dbc7Sjsg static unsigned int 2358ad8b1aafSjsg eb_select_legacy_ring(struct i915_execbuffer *eb) 2359c349dbc7Sjsg { 2360c349dbc7Sjsg struct drm_i915_private *i915 = eb->i915; 2361ad8b1aafSjsg struct drm_i915_gem_execbuffer2 *args = eb->args; 2362c349dbc7Sjsg unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK; 2363c349dbc7Sjsg 2364c349dbc7Sjsg if (user_ring_id != I915_EXEC_BSD && 2365c349dbc7Sjsg (args->flags & I915_EXEC_BSD_MASK)) { 2366c349dbc7Sjsg drm_dbg(&i915->drm, 2367c349dbc7Sjsg "execbuf with non bsd ring but with invalid " 2368c349dbc7Sjsg "bsd dispatch flags: %d\n", (int)(args->flags)); 2369c349dbc7Sjsg return -1; 2370c349dbc7Sjsg } 2371c349dbc7Sjsg 2372c349dbc7Sjsg if (user_ring_id == I915_EXEC_BSD && num_vcs_engines(i915) > 1) { 2373c349dbc7Sjsg unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK; 2374c349dbc7Sjsg 2375c349dbc7Sjsg if (bsd_idx == I915_EXEC_BSD_DEFAULT) { 2376ad8b1aafSjsg bsd_idx = gen8_dispatch_bsd_engine(i915, eb->file); 2377c349dbc7Sjsg } else if (bsd_idx >= I915_EXEC_BSD_RING1 && 2378c349dbc7Sjsg bsd_idx <= I915_EXEC_BSD_RING2) { 2379c349dbc7Sjsg bsd_idx >>= I915_EXEC_BSD_SHIFT; 2380c349dbc7Sjsg bsd_idx--; 2381c349dbc7Sjsg } else { 2382c349dbc7Sjsg drm_dbg(&i915->drm, 2383c349dbc7Sjsg "execbuf with unknown bsd ring: %u\n", 2384c349dbc7Sjsg bsd_idx); 2385c349dbc7Sjsg return -1; 2386c349dbc7Sjsg } 2387c349dbc7Sjsg 2388c349dbc7Sjsg return _VCS(bsd_idx); 2389c349dbc7Sjsg } 2390c349dbc7Sjsg 2391c349dbc7Sjsg if (user_ring_id >= ARRAY_SIZE(user_ring_map)) { 2392c349dbc7Sjsg drm_dbg(&i915->drm, "execbuf with unknown ring: %u\n", 2393c349dbc7Sjsg user_ring_id); 2394c349dbc7Sjsg return -1; 2395c349dbc7Sjsg } 2396c349dbc7Sjsg 2397c349dbc7Sjsg return user_ring_map[user_ring_id]; 2398c349dbc7Sjsg } 2399c349dbc7Sjsg 2400c349dbc7Sjsg static int 2401ad8b1aafSjsg eb_select_engine(struct i915_execbuffer *eb) 2402c349dbc7Sjsg { 2403c349dbc7Sjsg struct intel_context *ce; 2404c349dbc7Sjsg unsigned int idx; 2405c349dbc7Sjsg int err; 2406c349dbc7Sjsg 2407c349dbc7Sjsg if (i915_gem_context_user_engines(eb->gem_context)) 2408ad8b1aafSjsg idx = eb->args->flags & I915_EXEC_RING_MASK; 2409c349dbc7Sjsg else 2410ad8b1aafSjsg idx = eb_select_legacy_ring(eb); 2411c349dbc7Sjsg 2412c349dbc7Sjsg ce = i915_gem_context_get_engine(eb->gem_context, idx); 2413c349dbc7Sjsg if (IS_ERR(ce)) 2414c349dbc7Sjsg return PTR_ERR(ce); 2415c349dbc7Sjsg 2416ad8b1aafSjsg intel_gt_pm_get(ce->engine->gt); 2417c349dbc7Sjsg 2418ad8b1aafSjsg if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) { 2419ad8b1aafSjsg err = intel_context_alloc_state(ce); 2420ad8b1aafSjsg if (err) 2421ad8b1aafSjsg goto err; 2422ad8b1aafSjsg } 2423ad8b1aafSjsg 2424ad8b1aafSjsg /* 2425ad8b1aafSjsg * ABI: Before userspace accesses the GPU (e.g. execbuffer), report 2426ad8b1aafSjsg * EIO if the GPU is already wedged. 2427ad8b1aafSjsg */ 2428ad8b1aafSjsg err = intel_gt_terminally_wedged(ce->engine->gt); 2429ad8b1aafSjsg if (err) 2430ad8b1aafSjsg goto err; 2431ad8b1aafSjsg 2432ad8b1aafSjsg eb->context = ce; 2433ad8b1aafSjsg eb->engine = ce->engine; 2434ad8b1aafSjsg 2435ad8b1aafSjsg /* 2436ad8b1aafSjsg * Make sure engine pool stays alive even if we call intel_context_put 2437ad8b1aafSjsg * during ww handling. The pool is destroyed when last pm reference 2438ad8b1aafSjsg * is dropped, which breaks our -EDEADLK handling. 2439ad8b1aafSjsg */ 2440ad8b1aafSjsg return err; 2441ad8b1aafSjsg 2442ad8b1aafSjsg err: 2443ad8b1aafSjsg intel_gt_pm_put(ce->engine->gt); 2444ad8b1aafSjsg intel_context_put(ce); 2445c349dbc7Sjsg return err; 2446c349dbc7Sjsg } 2447c349dbc7Sjsg 2448c349dbc7Sjsg static void 2449ad8b1aafSjsg eb_put_engine(struct i915_execbuffer *eb) 2450c349dbc7Sjsg { 2451ad8b1aafSjsg intel_gt_pm_put(eb->engine->gt); 2452ad8b1aafSjsg intel_context_put(eb->context); 2453ad8b1aafSjsg } 2454ad8b1aafSjsg 2455ad8b1aafSjsg static void 2456ad8b1aafSjsg __free_fence_array(struct eb_fence *fences, unsigned int n) 2457ad8b1aafSjsg { 2458ad8b1aafSjsg while (n--) { 2459ad8b1aafSjsg drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2)); 2460ad8b1aafSjsg dma_fence_put(fences[n].dma_fence); 2461*5ca02815Sjsg dma_fence_chain_free(fences[n].chain_fence); 2462ad8b1aafSjsg } 2463c349dbc7Sjsg kvfree(fences); 2464c349dbc7Sjsg } 2465c349dbc7Sjsg 2466ad8b1aafSjsg static int 2467ad8b1aafSjsg add_timeline_fence_array(struct i915_execbuffer *eb, 2468ad8b1aafSjsg const struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences) 2469c349dbc7Sjsg { 2470ad8b1aafSjsg struct drm_i915_gem_exec_fence __user *user_fences; 2471ad8b1aafSjsg u64 __user *user_values; 2472ad8b1aafSjsg struct eb_fence *f; 2473ad8b1aafSjsg u64 nfences; 2474ad8b1aafSjsg int err = 0; 2475c349dbc7Sjsg 2476ad8b1aafSjsg nfences = timeline_fences->fence_count; 2477ad8b1aafSjsg if (!nfences) 2478ad8b1aafSjsg return 0; 2479c349dbc7Sjsg 2480c349dbc7Sjsg /* Check multiplication overflow for access_ok() and kvmalloc_array() */ 2481c349dbc7Sjsg BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long)); 2482c349dbc7Sjsg if (nfences > min_t(unsigned long, 2483ad8b1aafSjsg ULONG_MAX / sizeof(*user_fences), 2484ad8b1aafSjsg SIZE_MAX / sizeof(*f)) - eb->num_fences) 2485ad8b1aafSjsg return -EINVAL; 2486c349dbc7Sjsg 2487ad8b1aafSjsg user_fences = u64_to_user_ptr(timeline_fences->handles_ptr); 2488ad8b1aafSjsg if (!access_ok(user_fences, nfences * sizeof(*user_fences))) 2489ad8b1aafSjsg return -EFAULT; 2490c349dbc7Sjsg 2491ad8b1aafSjsg user_values = u64_to_user_ptr(timeline_fences->values_ptr); 2492ad8b1aafSjsg if (!access_ok(user_values, nfences * sizeof(*user_values))) 2493ad8b1aafSjsg return -EFAULT; 2494ad8b1aafSjsg 2495ad8b1aafSjsg #ifdef __linux__ 2496ad8b1aafSjsg f = krealloc(eb->fences, 2497ad8b1aafSjsg (eb->num_fences + nfences) * sizeof(*f), 2498c349dbc7Sjsg __GFP_NOWARN | GFP_KERNEL); 2499ad8b1aafSjsg if (!f) 2500ad8b1aafSjsg return -ENOMEM; 2501ad8b1aafSjsg #else 2502ad8b1aafSjsg f = kmalloc((eb->num_fences + nfences) * sizeof(*f), 2503ad8b1aafSjsg __GFP_NOWARN | GFP_KERNEL); 2504ad8b1aafSjsg if (!f) 2505ad8b1aafSjsg return -ENOMEM; 2506ad8b1aafSjsg memcpy(f, eb->fences, eb->num_fences * sizeof(*f)); 2507ad8b1aafSjsg kfree(eb->fences); 2508ad8b1aafSjsg #endif 2509c349dbc7Sjsg 2510ad8b1aafSjsg eb->fences = f; 2511ad8b1aafSjsg f += eb->num_fences; 2512ad8b1aafSjsg 2513ad8b1aafSjsg #ifdef notyet 2514ad8b1aafSjsg BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) & 2515ad8b1aafSjsg ~__I915_EXEC_FENCE_UNKNOWN_FLAGS); 2516ad8b1aafSjsg #endif 2517ad8b1aafSjsg 2518ad8b1aafSjsg while (nfences--) { 2519ad8b1aafSjsg struct drm_i915_gem_exec_fence user_fence; 2520c349dbc7Sjsg struct drm_syncobj *syncobj; 2521ad8b1aafSjsg struct dma_fence *fence = NULL; 2522ad8b1aafSjsg u64 point; 2523c349dbc7Sjsg 2524ad8b1aafSjsg if (__copy_from_user(&user_fence, 2525ad8b1aafSjsg user_fences++, 2526ad8b1aafSjsg sizeof(user_fence))) 2527ad8b1aafSjsg return -EFAULT; 2528c349dbc7Sjsg 2529ad8b1aafSjsg if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) 2530ad8b1aafSjsg return -EINVAL; 2531c349dbc7Sjsg 2532ad8b1aafSjsg if (__get_user(point, user_values++)) 2533ad8b1aafSjsg return -EFAULT; 2534ad8b1aafSjsg 2535ad8b1aafSjsg syncobj = drm_syncobj_find(eb->file, user_fence.handle); 2536c349dbc7Sjsg if (!syncobj) { 2537c349dbc7Sjsg DRM_DEBUG("Invalid syncobj handle provided\n"); 2538ad8b1aafSjsg return -ENOENT; 2539ad8b1aafSjsg } 2540ad8b1aafSjsg 2541ad8b1aafSjsg fence = drm_syncobj_fence_get(syncobj); 2542ad8b1aafSjsg 2543ad8b1aafSjsg if (!fence && user_fence.flags && 2544ad8b1aafSjsg !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) { 2545ad8b1aafSjsg DRM_DEBUG("Syncobj handle has no fence\n"); 2546ad8b1aafSjsg drm_syncobj_put(syncobj); 2547ad8b1aafSjsg return -EINVAL; 2548ad8b1aafSjsg } 2549ad8b1aafSjsg 2550ad8b1aafSjsg if (fence) 2551ad8b1aafSjsg err = dma_fence_chain_find_seqno(&fence, point); 2552ad8b1aafSjsg 2553ad8b1aafSjsg if (err && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) { 2554ad8b1aafSjsg DRM_DEBUG("Syncobj handle missing requested point %llu\n", point); 2555ad8b1aafSjsg dma_fence_put(fence); 2556ad8b1aafSjsg drm_syncobj_put(syncobj); 2557ad8b1aafSjsg return err; 2558ad8b1aafSjsg } 2559ad8b1aafSjsg 2560ad8b1aafSjsg /* 2561ad8b1aafSjsg * A point might have been signaled already and 2562ad8b1aafSjsg * garbage collected from the timeline. In this case 2563ad8b1aafSjsg * just ignore the point and carry on. 2564ad8b1aafSjsg */ 2565ad8b1aafSjsg if (!fence && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) { 2566ad8b1aafSjsg drm_syncobj_put(syncobj); 2567ad8b1aafSjsg continue; 2568ad8b1aafSjsg } 2569ad8b1aafSjsg 2570ad8b1aafSjsg /* 2571ad8b1aafSjsg * For timeline syncobjs we need to preallocate chains for 2572ad8b1aafSjsg * later signaling. 2573ad8b1aafSjsg */ 2574ad8b1aafSjsg if (point != 0 && user_fence.flags & I915_EXEC_FENCE_SIGNAL) { 2575ad8b1aafSjsg /* 2576ad8b1aafSjsg * Waiting and signaling the same point (when point != 2577ad8b1aafSjsg * 0) would break the timeline. 2578ad8b1aafSjsg */ 2579ad8b1aafSjsg if (user_fence.flags & I915_EXEC_FENCE_WAIT) { 2580ad8b1aafSjsg DRM_DEBUG("Trying to wait & signal the same timeline point.\n"); 2581ad8b1aafSjsg dma_fence_put(fence); 2582ad8b1aafSjsg drm_syncobj_put(syncobj); 2583ad8b1aafSjsg return -EINVAL; 2584ad8b1aafSjsg } 2585ad8b1aafSjsg 2586*5ca02815Sjsg f->chain_fence = dma_fence_chain_alloc(); 2587ad8b1aafSjsg if (!f->chain_fence) { 2588ad8b1aafSjsg drm_syncobj_put(syncobj); 2589ad8b1aafSjsg dma_fence_put(fence); 2590ad8b1aafSjsg return -ENOMEM; 2591ad8b1aafSjsg } 2592ad8b1aafSjsg } else { 2593ad8b1aafSjsg f->chain_fence = NULL; 2594ad8b1aafSjsg } 2595ad8b1aafSjsg 2596ad8b1aafSjsg f->syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2); 2597ad8b1aafSjsg f->dma_fence = fence; 2598ad8b1aafSjsg f->value = point; 2599ad8b1aafSjsg f++; 2600ad8b1aafSjsg eb->num_fences++; 2601ad8b1aafSjsg } 2602ad8b1aafSjsg 2603ad8b1aafSjsg return 0; 2604ad8b1aafSjsg } 2605ad8b1aafSjsg 2606ad8b1aafSjsg static int add_fence_array(struct i915_execbuffer *eb) 2607ad8b1aafSjsg { 2608ad8b1aafSjsg struct drm_i915_gem_execbuffer2 *args = eb->args; 2609ad8b1aafSjsg struct drm_i915_gem_exec_fence __user *user; 2610ad8b1aafSjsg unsigned long num_fences = args->num_cliprects; 2611ad8b1aafSjsg struct eb_fence *f; 2612ad8b1aafSjsg 2613ad8b1aafSjsg if (!(args->flags & I915_EXEC_FENCE_ARRAY)) 2614ad8b1aafSjsg return 0; 2615ad8b1aafSjsg 2616ad8b1aafSjsg if (!num_fences) 2617ad8b1aafSjsg return 0; 2618ad8b1aafSjsg 2619ad8b1aafSjsg /* Check multiplication overflow for access_ok() and kvmalloc_array() */ 2620ad8b1aafSjsg BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long)); 2621ad8b1aafSjsg if (num_fences > min_t(unsigned long, 2622ad8b1aafSjsg ULONG_MAX / sizeof(*user), 2623ad8b1aafSjsg SIZE_MAX / sizeof(*f) - eb->num_fences)) 2624ad8b1aafSjsg return -EINVAL; 2625ad8b1aafSjsg 2626ad8b1aafSjsg user = u64_to_user_ptr(args->cliprects_ptr); 2627ad8b1aafSjsg if (!access_ok(user, num_fences * sizeof(*user))) 2628ad8b1aafSjsg return -EFAULT; 2629ad8b1aafSjsg 2630ad8b1aafSjsg #ifdef __linux__ 2631ad8b1aafSjsg f = krealloc(eb->fences, 2632ad8b1aafSjsg (eb->num_fences + num_fences) * sizeof(*f), 2633ad8b1aafSjsg __GFP_NOWARN | GFP_KERNEL); 2634ad8b1aafSjsg if (!f) 2635ad8b1aafSjsg return -ENOMEM; 2636ad8b1aafSjsg #else 2637ad8b1aafSjsg f = kmalloc((eb->num_fences + num_fences) * sizeof(*f), 2638ad8b1aafSjsg __GFP_NOWARN | GFP_KERNEL); 2639ad8b1aafSjsg if (!f) 2640ad8b1aafSjsg return -ENOMEM; 2641ad8b1aafSjsg memcpy(f, eb->fences, eb->num_fences * sizeof(*f)); 2642ad8b1aafSjsg kfree(eb->fences); 2643ad8b1aafSjsg #endif 2644ad8b1aafSjsg 2645ad8b1aafSjsg eb->fences = f; 2646ad8b1aafSjsg f += eb->num_fences; 2647ad8b1aafSjsg while (num_fences--) { 2648ad8b1aafSjsg struct drm_i915_gem_exec_fence user_fence; 2649ad8b1aafSjsg struct drm_syncobj *syncobj; 2650ad8b1aafSjsg struct dma_fence *fence = NULL; 2651ad8b1aafSjsg 2652ad8b1aafSjsg if (__copy_from_user(&user_fence, user++, sizeof(user_fence))) 2653ad8b1aafSjsg return -EFAULT; 2654ad8b1aafSjsg 2655ad8b1aafSjsg if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) 2656ad8b1aafSjsg return -EINVAL; 2657ad8b1aafSjsg 2658ad8b1aafSjsg syncobj = drm_syncobj_find(eb->file, user_fence.handle); 2659ad8b1aafSjsg if (!syncobj) { 2660ad8b1aafSjsg DRM_DEBUG("Invalid syncobj handle provided\n"); 2661ad8b1aafSjsg return -ENOENT; 2662ad8b1aafSjsg } 2663ad8b1aafSjsg 2664ad8b1aafSjsg if (user_fence.flags & I915_EXEC_FENCE_WAIT) { 2665ad8b1aafSjsg fence = drm_syncobj_fence_get(syncobj); 2666ad8b1aafSjsg if (!fence) { 2667ad8b1aafSjsg DRM_DEBUG("Syncobj handle has no fence\n"); 2668ad8b1aafSjsg drm_syncobj_put(syncobj); 2669ad8b1aafSjsg return -EINVAL; 2670ad8b1aafSjsg } 2671c349dbc7Sjsg } 2672c349dbc7Sjsg 2673c349dbc7Sjsg #ifdef notyet 2674c349dbc7Sjsg BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) & 2675c349dbc7Sjsg ~__I915_EXEC_FENCE_UNKNOWN_FLAGS); 2676c349dbc7Sjsg #endif 2677c349dbc7Sjsg 2678ad8b1aafSjsg f->syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2); 2679ad8b1aafSjsg f->dma_fence = fence; 2680ad8b1aafSjsg f->value = 0; 2681ad8b1aafSjsg f->chain_fence = NULL; 2682ad8b1aafSjsg f++; 2683ad8b1aafSjsg eb->num_fences++; 2684c349dbc7Sjsg } 2685c349dbc7Sjsg 2686ad8b1aafSjsg return 0; 2687c349dbc7Sjsg } 2688c349dbc7Sjsg 2689ad8b1aafSjsg static void put_fence_array(struct eb_fence *fences, int num_fences) 2690c349dbc7Sjsg { 2691c349dbc7Sjsg if (fences) 2692ad8b1aafSjsg __free_fence_array(fences, num_fences); 2693c349dbc7Sjsg } 2694c349dbc7Sjsg 2695c349dbc7Sjsg static int 2696ad8b1aafSjsg await_fence_array(struct i915_execbuffer *eb) 2697c349dbc7Sjsg { 2698c349dbc7Sjsg unsigned int n; 2699c349dbc7Sjsg int err; 2700c349dbc7Sjsg 2701ad8b1aafSjsg for (n = 0; n < eb->num_fences; n++) { 2702c349dbc7Sjsg struct drm_syncobj *syncobj; 2703c349dbc7Sjsg unsigned int flags; 2704c349dbc7Sjsg 2705ad8b1aafSjsg syncobj = ptr_unpack_bits(eb->fences[n].syncobj, &flags, 2); 2706ad8b1aafSjsg 2707ad8b1aafSjsg if (!eb->fences[n].dma_fence) 2708c349dbc7Sjsg continue; 2709c349dbc7Sjsg 2710ad8b1aafSjsg err = i915_request_await_dma_fence(eb->request, 2711ad8b1aafSjsg eb->fences[n].dma_fence); 2712c349dbc7Sjsg if (err < 0) 2713c349dbc7Sjsg return err; 2714c349dbc7Sjsg } 2715c349dbc7Sjsg 2716c349dbc7Sjsg return 0; 2717c349dbc7Sjsg } 2718c349dbc7Sjsg 2719ad8b1aafSjsg static void signal_fence_array(const struct i915_execbuffer *eb) 2720c349dbc7Sjsg { 2721c349dbc7Sjsg struct dma_fence * const fence = &eb->request->fence; 2722c349dbc7Sjsg unsigned int n; 2723c349dbc7Sjsg 2724ad8b1aafSjsg for (n = 0; n < eb->num_fences; n++) { 2725c349dbc7Sjsg struct drm_syncobj *syncobj; 2726c349dbc7Sjsg unsigned int flags; 2727c349dbc7Sjsg 2728ad8b1aafSjsg syncobj = ptr_unpack_bits(eb->fences[n].syncobj, &flags, 2); 2729c349dbc7Sjsg if (!(flags & I915_EXEC_FENCE_SIGNAL)) 2730c349dbc7Sjsg continue; 2731c349dbc7Sjsg 2732ad8b1aafSjsg if (eb->fences[n].chain_fence) { 2733ad8b1aafSjsg drm_syncobj_add_point(syncobj, 2734ad8b1aafSjsg eb->fences[n].chain_fence, 2735ad8b1aafSjsg fence, 2736ad8b1aafSjsg eb->fences[n].value); 2737ad8b1aafSjsg /* 2738ad8b1aafSjsg * The chain's ownership is transferred to the 2739ad8b1aafSjsg * timeline. 2740ad8b1aafSjsg */ 2741ad8b1aafSjsg eb->fences[n].chain_fence = NULL; 2742ad8b1aafSjsg } else { 2743c349dbc7Sjsg drm_syncobj_replace_fence(syncobj, fence); 2744c349dbc7Sjsg } 2745c349dbc7Sjsg } 2746ad8b1aafSjsg } 2747ad8b1aafSjsg 2748ad8b1aafSjsg static int 2749ad8b1aafSjsg parse_timeline_fences(struct i915_user_extension __user *ext, void *data) 2750ad8b1aafSjsg { 2751ad8b1aafSjsg struct i915_execbuffer *eb = data; 2752ad8b1aafSjsg struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences; 2753ad8b1aafSjsg 2754ad8b1aafSjsg if (copy_from_user(&timeline_fences, ext, sizeof(timeline_fences))) 2755ad8b1aafSjsg return -EFAULT; 2756ad8b1aafSjsg 2757ad8b1aafSjsg return add_timeline_fence_array(eb, &timeline_fences); 2758ad8b1aafSjsg } 2759c349dbc7Sjsg 2760c349dbc7Sjsg static void retire_requests(struct intel_timeline *tl, struct i915_request *end) 2761c349dbc7Sjsg { 2762c349dbc7Sjsg struct i915_request *rq, *rn; 2763c349dbc7Sjsg 2764c349dbc7Sjsg list_for_each_entry_safe(rq, rn, &tl->requests, link) 2765c349dbc7Sjsg if (rq == end || !i915_request_retire(rq)) 2766c349dbc7Sjsg break; 2767c349dbc7Sjsg } 2768c349dbc7Sjsg 2769ad8b1aafSjsg static int eb_request_add(struct i915_execbuffer *eb, int err) 2770c349dbc7Sjsg { 2771c349dbc7Sjsg struct i915_request *rq = eb->request; 2772c349dbc7Sjsg struct intel_timeline * const tl = i915_request_timeline(rq); 2773c349dbc7Sjsg struct i915_sched_attr attr = {}; 2774c349dbc7Sjsg struct i915_request *prev; 2775c349dbc7Sjsg 2776c349dbc7Sjsg lockdep_assert_held(&tl->mutex); 2777c349dbc7Sjsg lockdep_unpin_lock(&tl->mutex, rq->cookie); 2778c349dbc7Sjsg 2779c349dbc7Sjsg trace_i915_request_add(rq); 2780c349dbc7Sjsg 2781c349dbc7Sjsg prev = __i915_request_commit(rq); 2782c349dbc7Sjsg 2783c349dbc7Sjsg /* Check that the context wasn't destroyed before submission */ 2784c349dbc7Sjsg if (likely(!intel_context_is_closed(eb->context))) { 2785c349dbc7Sjsg attr = eb->gem_context->sched; 2786c349dbc7Sjsg } else { 2787c349dbc7Sjsg /* Serialise with context_close via the add_to_timeline */ 2788c349dbc7Sjsg i915_request_set_error_once(rq, -ENOENT); 2789c349dbc7Sjsg __i915_request_skip(rq); 2790ad8b1aafSjsg err = -ENOENT; /* override any transient errors */ 2791c349dbc7Sjsg } 2792c349dbc7Sjsg 2793c349dbc7Sjsg __i915_request_queue(rq, &attr); 2794c349dbc7Sjsg 2795c349dbc7Sjsg /* Try to clean up the client's timeline after submitting the request */ 2796c349dbc7Sjsg if (prev) 2797c349dbc7Sjsg retire_requests(tl, prev); 2798c349dbc7Sjsg 2799c349dbc7Sjsg mutex_unlock(&tl->mutex); 2800ad8b1aafSjsg 2801ad8b1aafSjsg return err; 2802ad8b1aafSjsg } 2803ad8b1aafSjsg 2804ad8b1aafSjsg static const i915_user_extension_fn execbuf_extensions[] = { 2805ad8b1aafSjsg [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences, 2806ad8b1aafSjsg }; 2807ad8b1aafSjsg 2808ad8b1aafSjsg static int 2809ad8b1aafSjsg parse_execbuf2_extensions(struct drm_i915_gem_execbuffer2 *args, 2810ad8b1aafSjsg struct i915_execbuffer *eb) 2811ad8b1aafSjsg { 2812ad8b1aafSjsg if (!(args->flags & I915_EXEC_USE_EXTENSIONS)) 2813ad8b1aafSjsg return 0; 2814ad8b1aafSjsg 2815ad8b1aafSjsg /* The execbuf2 extension mechanism reuses cliprects_ptr. So we cannot 2816ad8b1aafSjsg * have another flag also using it at the same time. 2817ad8b1aafSjsg */ 2818ad8b1aafSjsg if (eb->args->flags & I915_EXEC_FENCE_ARRAY) 2819ad8b1aafSjsg return -EINVAL; 2820ad8b1aafSjsg 2821ad8b1aafSjsg if (args->num_cliprects != 0) 2822ad8b1aafSjsg return -EINVAL; 2823ad8b1aafSjsg 2824ad8b1aafSjsg return i915_user_extensions(u64_to_user_ptr(args->cliprects_ptr), 2825ad8b1aafSjsg execbuf_extensions, 2826ad8b1aafSjsg ARRAY_SIZE(execbuf_extensions), 2827ad8b1aafSjsg eb); 2828c349dbc7Sjsg } 2829c349dbc7Sjsg 2830c349dbc7Sjsg static int 2831c349dbc7Sjsg i915_gem_do_execbuffer(struct drm_device *dev, 2832c349dbc7Sjsg struct drm_file *file, 2833c349dbc7Sjsg struct drm_i915_gem_execbuffer2 *args, 2834ad8b1aafSjsg struct drm_i915_gem_exec_object2 *exec) 2835c349dbc7Sjsg { 2836c349dbc7Sjsg struct drm_i915_private *i915 = to_i915(dev); 2837c349dbc7Sjsg struct i915_execbuffer eb; 2838c349dbc7Sjsg struct dma_fence *in_fence = NULL; 2839c349dbc7Sjsg struct sync_file *out_fence = NULL; 2840c349dbc7Sjsg struct i915_vma *batch; 2841c349dbc7Sjsg int out_fence_fd = -1; 2842c349dbc7Sjsg int err; 2843c349dbc7Sjsg 2844c349dbc7Sjsg BUILD_BUG_ON(__EXEC_INTERNAL_FLAGS & ~__I915_EXEC_ILLEGAL_FLAGS); 2845c349dbc7Sjsg BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS & 2846c349dbc7Sjsg ~__EXEC_OBJECT_UNKNOWN_FLAGS); 2847c349dbc7Sjsg 2848c349dbc7Sjsg eb.i915 = i915; 2849c349dbc7Sjsg eb.file = file; 2850c349dbc7Sjsg eb.args = args; 2851c349dbc7Sjsg if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC)) 2852c349dbc7Sjsg args->flags |= __EXEC_HAS_RELOC; 2853c349dbc7Sjsg 2854c349dbc7Sjsg eb.exec = exec; 2855c349dbc7Sjsg eb.vma = (struct eb_vma *)(exec + args->buffer_count + 1); 2856c349dbc7Sjsg eb.vma[0].vma = NULL; 2857*5ca02815Sjsg eb.batch_pool = NULL; 2858c349dbc7Sjsg 2859c349dbc7Sjsg eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS; 2860c349dbc7Sjsg reloc_cache_init(&eb.reloc_cache, eb.i915); 2861c349dbc7Sjsg 2862c349dbc7Sjsg eb.buffer_count = args->buffer_count; 2863c349dbc7Sjsg eb.batch_start_offset = args->batch_start_offset; 2864c349dbc7Sjsg eb.batch_len = args->batch_len; 2865c349dbc7Sjsg eb.trampoline = NULL; 2866c349dbc7Sjsg 2867ad8b1aafSjsg eb.fences = NULL; 2868ad8b1aafSjsg eb.num_fences = 0; 2869ad8b1aafSjsg 2870c349dbc7Sjsg eb.batch_flags = 0; 2871c349dbc7Sjsg if (args->flags & I915_EXEC_SECURE) { 2872*5ca02815Sjsg if (GRAPHICS_VER(i915) >= 11) 2873c349dbc7Sjsg return -ENODEV; 2874c349dbc7Sjsg 2875c349dbc7Sjsg /* Return -EPERM to trigger fallback code on old binaries. */ 2876c349dbc7Sjsg if (!HAS_SECURE_BATCHES(i915)) 2877c349dbc7Sjsg return -EPERM; 2878c349dbc7Sjsg 2879c349dbc7Sjsg if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN)) 2880c349dbc7Sjsg return -EPERM; 2881c349dbc7Sjsg 2882c349dbc7Sjsg eb.batch_flags |= I915_DISPATCH_SECURE; 2883c349dbc7Sjsg } 2884c349dbc7Sjsg if (args->flags & I915_EXEC_IS_PINNED) 2885c349dbc7Sjsg eb.batch_flags |= I915_DISPATCH_PINNED; 2886c349dbc7Sjsg 2887ad8b1aafSjsg err = parse_execbuf2_extensions(args, &eb); 2888ad8b1aafSjsg if (err) 2889ad8b1aafSjsg goto err_ext; 2890ad8b1aafSjsg 2891ad8b1aafSjsg err = add_fence_array(&eb); 2892ad8b1aafSjsg if (err) 2893ad8b1aafSjsg goto err_ext; 2894ad8b1aafSjsg 2895ad8b1aafSjsg #define IN_FENCES (I915_EXEC_FENCE_IN | I915_EXEC_FENCE_SUBMIT) 2896ad8b1aafSjsg if (args->flags & IN_FENCES) { 2897ad8b1aafSjsg if ((args->flags & IN_FENCES) == IN_FENCES) 2898c349dbc7Sjsg return -EINVAL; 2899c349dbc7Sjsg 2900ad8b1aafSjsg in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2)); 2901ad8b1aafSjsg if (!in_fence) { 2902c349dbc7Sjsg err = -EINVAL; 2903ad8b1aafSjsg goto err_ext; 2904c349dbc7Sjsg } 2905c349dbc7Sjsg } 2906ad8b1aafSjsg #undef IN_FENCES 2907c349dbc7Sjsg 2908c349dbc7Sjsg if (args->flags & I915_EXEC_FENCE_OUT) { 2909c349dbc7Sjsg out_fence_fd = get_unused_fd_flags(O_CLOEXEC); 2910c349dbc7Sjsg if (out_fence_fd < 0) { 2911c349dbc7Sjsg err = out_fence_fd; 2912ad8b1aafSjsg goto err_in_fence; 2913c349dbc7Sjsg } 2914c349dbc7Sjsg } 2915c349dbc7Sjsg 2916c349dbc7Sjsg err = eb_create(&eb); 2917c349dbc7Sjsg if (err) 2918c349dbc7Sjsg goto err_out_fence; 2919c349dbc7Sjsg 2920c349dbc7Sjsg GEM_BUG_ON(!eb.lut_size); 2921c349dbc7Sjsg 2922c349dbc7Sjsg err = eb_select_context(&eb); 2923c349dbc7Sjsg if (unlikely(err)) 2924c349dbc7Sjsg goto err_destroy; 2925c349dbc7Sjsg 2926ad8b1aafSjsg err = eb_select_engine(&eb); 2927c349dbc7Sjsg if (unlikely(err)) 2928c349dbc7Sjsg goto err_context; 2929c349dbc7Sjsg 2930ad8b1aafSjsg err = eb_lookup_vmas(&eb); 2931ad8b1aafSjsg if (err) { 2932ad8b1aafSjsg eb_release_vmas(&eb, true); 2933ad8b1aafSjsg goto err_engine; 2934ad8b1aafSjsg } 2935ad8b1aafSjsg 2936ad8b1aafSjsg i915_gem_ww_ctx_init(&eb.ww, true); 2937ad8b1aafSjsg 2938ad8b1aafSjsg err = eb_relocate_parse(&eb); 2939c349dbc7Sjsg if (err) { 2940c349dbc7Sjsg /* 2941c349dbc7Sjsg * If the user expects the execobject.offset and 2942c349dbc7Sjsg * reloc.presumed_offset to be an exact match, 2943c349dbc7Sjsg * as for using NO_RELOC, then we cannot update 2944c349dbc7Sjsg * the execobject.offset until we have completed 2945c349dbc7Sjsg * relocation. 2946c349dbc7Sjsg */ 2947c349dbc7Sjsg args->flags &= ~__EXEC_HAS_RELOC; 2948c349dbc7Sjsg goto err_vma; 2949c349dbc7Sjsg } 2950c349dbc7Sjsg 2951ad8b1aafSjsg ww_acquire_done(&eb.ww.ctx); 2952c349dbc7Sjsg 2953c349dbc7Sjsg batch = eb.batch->vma; 2954c349dbc7Sjsg 2955c349dbc7Sjsg /* Allocate a request for this batch buffer nice and early. */ 2956c349dbc7Sjsg eb.request = i915_request_create(eb.context); 2957c349dbc7Sjsg if (IS_ERR(eb.request)) { 2958c349dbc7Sjsg err = PTR_ERR(eb.request); 2959ad8b1aafSjsg goto err_vma; 2960c349dbc7Sjsg } 2961c349dbc7Sjsg 2962*5ca02815Sjsg if (unlikely(eb.gem_context->syncobj)) { 2963*5ca02815Sjsg struct dma_fence *fence; 2964*5ca02815Sjsg 2965*5ca02815Sjsg fence = drm_syncobj_fence_get(eb.gem_context->syncobj); 2966*5ca02815Sjsg err = i915_request_await_dma_fence(eb.request, fence); 2967*5ca02815Sjsg dma_fence_put(fence); 2968*5ca02815Sjsg if (err) 2969*5ca02815Sjsg goto err_ext; 2970*5ca02815Sjsg } 2971*5ca02815Sjsg 2972c349dbc7Sjsg if (in_fence) { 2973ad8b1aafSjsg if (args->flags & I915_EXEC_FENCE_SUBMIT) 2974ad8b1aafSjsg err = i915_request_await_execution(eb.request, 2975*5ca02815Sjsg in_fence); 2976ad8b1aafSjsg else 2977ad8b1aafSjsg err = i915_request_await_dma_fence(eb.request, 2978ad8b1aafSjsg in_fence); 2979c349dbc7Sjsg if (err < 0) 2980c349dbc7Sjsg goto err_request; 2981c349dbc7Sjsg } 2982c349dbc7Sjsg 2983ad8b1aafSjsg if (eb.fences) { 2984ad8b1aafSjsg err = await_fence_array(&eb); 2985c349dbc7Sjsg if (err) 2986c349dbc7Sjsg goto err_request; 2987c349dbc7Sjsg } 2988c349dbc7Sjsg 2989c349dbc7Sjsg if (out_fence_fd != -1) { 2990c349dbc7Sjsg out_fence = sync_file_create(&eb.request->fence); 2991c349dbc7Sjsg if (!out_fence) { 2992c349dbc7Sjsg err = -ENOMEM; 2993c349dbc7Sjsg goto err_request; 2994c349dbc7Sjsg } 2995c349dbc7Sjsg } 2996c349dbc7Sjsg 2997c349dbc7Sjsg /* 2998c349dbc7Sjsg * Whilst this request exists, batch_obj will be on the 2999c349dbc7Sjsg * active_list, and so will hold the active reference. Only when this 3000c349dbc7Sjsg * request is retired will the the batch_obj be moved onto the 3001c349dbc7Sjsg * inactive_list and lose its active reference. Hence we do not need 3002c349dbc7Sjsg * to explicitly hold another reference here. 3003c349dbc7Sjsg */ 3004c349dbc7Sjsg eb.request->batch = batch; 3005ad8b1aafSjsg if (eb.batch_pool) 3006ad8b1aafSjsg intel_gt_buffer_pool_mark_active(eb.batch_pool, eb.request); 3007c349dbc7Sjsg 3008c349dbc7Sjsg trace_i915_request_queue(eb.request, eb.batch_flags); 3009c349dbc7Sjsg err = eb_submit(&eb, batch); 3010*5ca02815Sjsg 3011c349dbc7Sjsg err_request: 3012c349dbc7Sjsg i915_request_get(eb.request); 3013ad8b1aafSjsg err = eb_request_add(&eb, err); 3014c349dbc7Sjsg 3015ad8b1aafSjsg if (eb.fences) 3016ad8b1aafSjsg signal_fence_array(&eb); 3017c349dbc7Sjsg 3018c349dbc7Sjsg if (out_fence) { 3019c349dbc7Sjsg if (err == 0) { 3020c349dbc7Sjsg fd_install(out_fence_fd, out_fence->file); 3021c349dbc7Sjsg args->rsvd2 &= GENMASK_ULL(31, 0); /* keep in-fence */ 3022c349dbc7Sjsg args->rsvd2 |= (u64)out_fence_fd << 32; 3023c349dbc7Sjsg out_fence_fd = -1; 3024c349dbc7Sjsg } else { 3025c349dbc7Sjsg fput(out_fence->file); 3026c349dbc7Sjsg } 3027c349dbc7Sjsg } 3028*5ca02815Sjsg 3029*5ca02815Sjsg if (unlikely(eb.gem_context->syncobj)) { 3030*5ca02815Sjsg drm_syncobj_replace_fence(eb.gem_context->syncobj, 3031*5ca02815Sjsg &eb.request->fence); 3032*5ca02815Sjsg } 3033*5ca02815Sjsg 3034c349dbc7Sjsg i915_request_put(eb.request); 3035c349dbc7Sjsg 3036c349dbc7Sjsg err_vma: 3037ad8b1aafSjsg eb_release_vmas(&eb, true); 3038c349dbc7Sjsg if (eb.trampoline) 3039c349dbc7Sjsg i915_vma_unpin(eb.trampoline); 3040ad8b1aafSjsg WARN_ON(err == -EDEADLK); 3041ad8b1aafSjsg i915_gem_ww_ctx_fini(&eb.ww); 3042ad8b1aafSjsg 3043ad8b1aafSjsg if (eb.batch_pool) 3044ad8b1aafSjsg intel_gt_buffer_pool_put(eb.batch_pool); 3045ad8b1aafSjsg err_engine: 3046ad8b1aafSjsg eb_put_engine(&eb); 3047c349dbc7Sjsg err_context: 3048c349dbc7Sjsg i915_gem_context_put(eb.gem_context); 3049c349dbc7Sjsg err_destroy: 3050c349dbc7Sjsg eb_destroy(&eb); 3051c349dbc7Sjsg err_out_fence: 3052c349dbc7Sjsg if (out_fence_fd != -1) 3053c349dbc7Sjsg put_unused_fd(out_fence_fd); 3054c349dbc7Sjsg err_in_fence: 3055c349dbc7Sjsg dma_fence_put(in_fence); 3056ad8b1aafSjsg err_ext: 3057ad8b1aafSjsg put_fence_array(eb.fences, eb.num_fences); 3058c349dbc7Sjsg return err; 3059c349dbc7Sjsg } 3060c349dbc7Sjsg 3061c349dbc7Sjsg static size_t eb_element_size(void) 3062c349dbc7Sjsg { 3063c349dbc7Sjsg return sizeof(struct drm_i915_gem_exec_object2) + sizeof(struct eb_vma); 3064c349dbc7Sjsg } 3065c349dbc7Sjsg 3066c349dbc7Sjsg static bool check_buffer_count(size_t count) 3067c349dbc7Sjsg { 3068c349dbc7Sjsg const size_t sz = eb_element_size(); 3069c349dbc7Sjsg 3070c349dbc7Sjsg /* 3071c349dbc7Sjsg * When using LUT_HANDLE, we impose a limit of INT_MAX for the lookup 3072c349dbc7Sjsg * array size (see eb_create()). Otherwise, we can accept an array as 3073c349dbc7Sjsg * large as can be addressed (though use large arrays at your peril)! 3074c349dbc7Sjsg */ 3075c349dbc7Sjsg 3076c349dbc7Sjsg return !(count < 1 || count > INT_MAX || count > SIZE_MAX / sz - 1); 3077c349dbc7Sjsg } 3078c349dbc7Sjsg 3079c349dbc7Sjsg int 3080c349dbc7Sjsg i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, 3081c349dbc7Sjsg struct drm_file *file) 3082c349dbc7Sjsg { 3083c349dbc7Sjsg struct drm_i915_private *i915 = to_i915(dev); 3084c349dbc7Sjsg struct drm_i915_gem_execbuffer2 *args = data; 3085c349dbc7Sjsg struct drm_i915_gem_exec_object2 *exec2_list; 3086c349dbc7Sjsg const size_t count = args->buffer_count; 3087c349dbc7Sjsg int err; 3088c349dbc7Sjsg 3089c349dbc7Sjsg if (!check_buffer_count(count)) { 3090c349dbc7Sjsg drm_dbg(&i915->drm, "execbuf2 with %zd buffers\n", count); 3091c349dbc7Sjsg return -EINVAL; 3092c349dbc7Sjsg } 3093c349dbc7Sjsg 3094c349dbc7Sjsg err = i915_gem_check_execbuffer(args); 3095c349dbc7Sjsg if (err) 3096c349dbc7Sjsg return err; 3097c349dbc7Sjsg 3098ad8b1aafSjsg /* Allocate extra slots for use by the command parser */ 3099ad8b1aafSjsg exec2_list = kvmalloc_array(count + 2, eb_element_size(), 3100c349dbc7Sjsg __GFP_NOWARN | GFP_KERNEL); 3101c349dbc7Sjsg if (exec2_list == NULL) { 3102c349dbc7Sjsg drm_dbg(&i915->drm, "Failed to allocate exec list for %zd buffers\n", 3103c349dbc7Sjsg count); 3104c349dbc7Sjsg return -ENOMEM; 3105c349dbc7Sjsg } 3106c349dbc7Sjsg if (copy_from_user(exec2_list, 3107c349dbc7Sjsg u64_to_user_ptr(args->buffers_ptr), 3108c349dbc7Sjsg sizeof(*exec2_list) * count)) { 3109c349dbc7Sjsg drm_dbg(&i915->drm, "copy %zd exec entries failed\n", count); 3110c349dbc7Sjsg kvfree(exec2_list); 3111c349dbc7Sjsg return -EFAULT; 3112c349dbc7Sjsg } 3113c349dbc7Sjsg 3114ad8b1aafSjsg err = i915_gem_do_execbuffer(dev, file, args, exec2_list); 3115c349dbc7Sjsg 3116c349dbc7Sjsg /* 3117c349dbc7Sjsg * Now that we have begun execution of the batchbuffer, we ignore 3118c349dbc7Sjsg * any new error after this point. Also given that we have already 3119c349dbc7Sjsg * updated the associated relocations, we try to write out the current 3120c349dbc7Sjsg * object locations irrespective of any error. 3121c349dbc7Sjsg */ 3122c349dbc7Sjsg if (args->flags & __EXEC_HAS_RELOC) { 3123c349dbc7Sjsg struct drm_i915_gem_exec_object2 __user *user_exec_list = 3124c349dbc7Sjsg u64_to_user_ptr(args->buffers_ptr); 3125c349dbc7Sjsg unsigned int i; 3126c349dbc7Sjsg 3127c349dbc7Sjsg /* Copy the new buffer offsets back to the user's exec list. */ 3128c349dbc7Sjsg /* 3129c349dbc7Sjsg * Note: count * sizeof(*user_exec_list) does not overflow, 3130c349dbc7Sjsg * because we checked 'count' in check_buffer_count(). 3131c349dbc7Sjsg * 3132c349dbc7Sjsg * And this range already got effectively checked earlier 3133c349dbc7Sjsg * when we did the "copy_from_user()" above. 3134c349dbc7Sjsg */ 3135ad8b1aafSjsg if (!user_write_access_begin(user_exec_list, 3136ad8b1aafSjsg count * sizeof(*user_exec_list))) 3137c349dbc7Sjsg goto end; 3138c349dbc7Sjsg 3139c349dbc7Sjsg for (i = 0; i < args->buffer_count; i++) { 3140c349dbc7Sjsg if (!(exec2_list[i].offset & UPDATE)) 3141c349dbc7Sjsg continue; 3142c349dbc7Sjsg 3143c349dbc7Sjsg exec2_list[i].offset = 3144c349dbc7Sjsg gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK); 3145c349dbc7Sjsg unsafe_put_user(exec2_list[i].offset, 3146c349dbc7Sjsg &user_exec_list[i].offset, 3147c349dbc7Sjsg end_user); 3148c349dbc7Sjsg } 3149c349dbc7Sjsg end_user: 3150ad8b1aafSjsg user_write_access_end(); 3151c349dbc7Sjsg end:; 3152c349dbc7Sjsg } 3153c349dbc7Sjsg 3154c349dbc7Sjsg args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS; 3155c349dbc7Sjsg kvfree(exec2_list); 3156c349dbc7Sjsg return err; 3157c349dbc7Sjsg } 3158