15e5d2e20SChris Wilson /* 25e5d2e20SChris Wilson * SPDX-License-Identifier: MIT 35e5d2e20SChris Wilson * 45e5d2e20SChris Wilson * Copyright © 2016 Intel Corporation 55e5d2e20SChris Wilson */ 65e5d2e20SChris Wilson 75e5d2e20SChris Wilson #ifndef __I915_GEM_OBJECT_TYPES_H__ 85e5d2e20SChris Wilson #define __I915_GEM_OBJECT_TYPES_H__ 95e5d2e20SChris Wilson 10ed29c269SMaarten Lankhorst #include <linux/mmu_notifier.h> 11ed29c269SMaarten Lankhorst 125e5d2e20SChris Wilson #include <drm/drm_gem.h> 13f4db23f2SThomas Hellström #include <drm/ttm/ttm_bo_api.h> 14b1e3177bSChris Wilson #include <uapi/drm/i915_drm.h> 155e5d2e20SChris Wilson 165e5d2e20SChris Wilson #include "i915_active.h" 175e5d2e20SChris Wilson #include "i915_selftest.h" 185e5d2e20SChris Wilson 195e5d2e20SChris Wilson struct drm_i915_gem_object; 208e7cb179SChris Wilson struct intel_fronbuffer; 21b6e913e1SThomas Hellström struct intel_memory_region; 225e5d2e20SChris Wilson 235e5d2e20SChris Wilson /* 245e5d2e20SChris Wilson * struct i915_lut_handle tracks the fast lookups from handle to vma used 255e5d2e20SChris Wilson * for execbuf. Although we use a radixtree for that mapping, in order to 265e5d2e20SChris Wilson * remove them as the object or context is closed, we need a secondary list 275e5d2e20SChris Wilson * and a translation entry (i915_lut_handle). 285e5d2e20SChris Wilson */ 295e5d2e20SChris Wilson struct i915_lut_handle { 305e5d2e20SChris Wilson struct list_head obj_link; 315e5d2e20SChris Wilson struct i915_gem_context *ctx; 325e5d2e20SChris Wilson u32 handle; 335e5d2e20SChris Wilson }; 345e5d2e20SChris Wilson 355e5d2e20SChris Wilson struct drm_i915_gem_object_ops { 365e5d2e20SChris Wilson unsigned int flags; 370ff37575SThomas Hellström #define I915_GEM_OBJECT_IS_SHRINKABLE BIT(1) 38ebd4a8ecSMatthew Auld /* Skip the shrinker management in set_pages/unset_pages */ 39ebd4a8ecSMatthew Auld #define I915_GEM_OBJECT_SELF_MANAGED_SHRINK_LIST BIT(2) 40ebd4a8ecSMatthew Auld #define I915_GEM_OBJECT_IS_PROXY BIT(3) 41ebd4a8ecSMatthew Auld #define I915_GEM_OBJECT_NO_MMAP BIT(4) 425e5d2e20SChris Wilson 435e5d2e20SChris Wilson /* Interface between the GEM object and its backing storage. 445e5d2e20SChris Wilson * get_pages() is called once prior to the use of the associated set 455e5d2e20SChris Wilson * of pages before to binding them into the GTT, and put_pages() is 465e5d2e20SChris Wilson * called after we no longer need them. As we expect there to be 475e5d2e20SChris Wilson * associated cost with migrating pages between the backing storage 485e5d2e20SChris Wilson * and making them available for the GPU (e.g. clflush), we may hold 495e5d2e20SChris Wilson * onto the pages after they are no longer referenced by the GPU 505e5d2e20SChris Wilson * in case they may be used again shortly (for example migrating the 515e5d2e20SChris Wilson * pages to a different memory domain within the GTT). put_pages() 525e5d2e20SChris Wilson * will therefore most likely be called when the object itself is 535e5d2e20SChris Wilson * being released or under memory pressure (where we attempt to 545e5d2e20SChris Wilson * reap pages for the shrinker). 555e5d2e20SChris Wilson */ 565e5d2e20SChris Wilson int (*get_pages)(struct drm_i915_gem_object *obj); 575e5d2e20SChris Wilson void (*put_pages)(struct drm_i915_gem_object *obj, 585e5d2e20SChris Wilson struct sg_table *pages); 597ae03459SMatthew Auld int (*truncate)(struct drm_i915_gem_object *obj); 60ffa3fe08SMatthew Auld /** 61ffa3fe08SMatthew Auld * shrink - Perform further backend specific actions to facilate 62ffa3fe08SMatthew Auld * shrinking. 63ffa3fe08SMatthew Auld * @obj: The gem object 64ffa3fe08SMatthew Auld * @flags: Extra flags to control shrinking behaviour in the backend 65ffa3fe08SMatthew Auld * 66ffa3fe08SMatthew Auld * Possible values for @flags: 67ffa3fe08SMatthew Auld * 68ffa3fe08SMatthew Auld * I915_GEM_OBJECT_SHRINK_WRITEBACK - Try to perform writeback of the 69ffa3fe08SMatthew Auld * backing pages, if supported. 70ffa3fe08SMatthew Auld * 71ffa3fe08SMatthew Auld * I915_GEM_OBJECT_SHRINK_NO_GPU_WAIT - Don't wait for the object to 72ffa3fe08SMatthew Auld * idle. Active objects can be considered later. The TTM backend for 73ffa3fe08SMatthew Auld * example might have aync migrations going on, which don't use any 74ffa3fe08SMatthew Auld * i915_vma to track the active GTT binding, and hence having an unbound 75ffa3fe08SMatthew Auld * object might not be enough. 76ffa3fe08SMatthew Auld */ 77ffa3fe08SMatthew Auld #define I915_GEM_OBJECT_SHRINK_WRITEBACK BIT(0) 78ffa3fe08SMatthew Auld #define I915_GEM_OBJECT_SHRINK_NO_GPU_WAIT BIT(1) 79ffa3fe08SMatthew Auld int (*shrink)(struct drm_i915_gem_object *obj, unsigned int flags); 805e5d2e20SChris Wilson 810049b688SMatthew Auld int (*pread)(struct drm_i915_gem_object *obj, 820049b688SMatthew Auld const struct drm_i915_gem_pread *arg); 835e5d2e20SChris Wilson int (*pwrite)(struct drm_i915_gem_object *obj, 845e5d2e20SChris Wilson const struct drm_i915_gem_pwrite *arg); 85cf3e3e86SMaarten Lankhorst u64 (*mmap_offset)(struct drm_i915_gem_object *obj); 86*903e0387SMatthew Auld void (*unmap_virtual)(struct drm_i915_gem_object *obj); 875e5d2e20SChris Wilson 885e5d2e20SChris Wilson int (*dmabuf_export)(struct drm_i915_gem_object *obj); 89213d5092SThomas Hellström 90213d5092SThomas Hellström /** 91213d5092SThomas Hellström * adjust_lru - notify that the madvise value was updated 92213d5092SThomas Hellström * @obj: The gem object 93213d5092SThomas Hellström * 94213d5092SThomas Hellström * The madvise value may have been updated, or object was recently 95213d5092SThomas Hellström * referenced so act accordingly (Perhaps changing an LRU list etc). 96213d5092SThomas Hellström */ 97213d5092SThomas Hellström void (*adjust_lru)(struct drm_i915_gem_object *obj); 98213d5092SThomas Hellström 99213d5092SThomas Hellström /** 100213d5092SThomas Hellström * delayed_free - Override the default delayed free implementation 101213d5092SThomas Hellström */ 102213d5092SThomas Hellström void (*delayed_free)(struct drm_i915_gem_object *obj); 103b6e913e1SThomas Hellström 104b6e913e1SThomas Hellström /** 105b6e913e1SThomas Hellström * migrate - Migrate object to a different region either for 106b6e913e1SThomas Hellström * pinning or for as long as the object lock is held. 107b6e913e1SThomas Hellström */ 108b6e913e1SThomas Hellström int (*migrate)(struct drm_i915_gem_object *obj, 109b6e913e1SThomas Hellström struct intel_memory_region *mr); 110b6e913e1SThomas Hellström 1115e5d2e20SChris Wilson void (*release)(struct drm_i915_gem_object *obj); 1127d192daaSChris Wilson 113cf3e3e86SMaarten Lankhorst const struct vm_operations_struct *mmap_ops; 1147d192daaSChris Wilson const char *name; /* friendly name for debug, e.g. lockdep classes */ 1155e5d2e20SChris Wilson }; 1165e5d2e20SChris Wilson 1173821cc7fSMatthew Auld /** 1183821cc7fSMatthew Auld * enum i915_cache_level - The supported GTT caching values for system memory 1193821cc7fSMatthew Auld * pages. 1203821cc7fSMatthew Auld * 1213821cc7fSMatthew Auld * These translate to some special GTT PTE bits when binding pages into some 1223821cc7fSMatthew Auld * address space. It also determines whether an object, or rather its pages are 1233821cc7fSMatthew Auld * coherent with the GPU, when also reading or writing through the CPU cache 1243821cc7fSMatthew Auld * with those pages. 1253821cc7fSMatthew Auld * 1263821cc7fSMatthew Auld * Userspace can also control this through struct drm_i915_gem_caching. 1273821cc7fSMatthew Auld */ 1283821cc7fSMatthew Auld enum i915_cache_level { 1293821cc7fSMatthew Auld /** 1303821cc7fSMatthew Auld * @I915_CACHE_NONE: 1313821cc7fSMatthew Auld * 1323821cc7fSMatthew Auld * GPU access is not coherent with the CPU cache. If the cache is dirty 1333821cc7fSMatthew Auld * and we need the underlying pages to be coherent with some later GPU 1343821cc7fSMatthew Auld * access then we need to manually flush the pages. 1353821cc7fSMatthew Auld * 1363821cc7fSMatthew Auld * On shared LLC platforms reads and writes through the CPU cache are 1373821cc7fSMatthew Auld * still coherent even with this setting. See also 1383821cc7fSMatthew Auld * &drm_i915_gem_object.cache_coherent for more details. Due to this we 1393821cc7fSMatthew Auld * should only ever use uncached for scanout surfaces, otherwise we end 1403821cc7fSMatthew Auld * up over-flushing in some places. 1413821cc7fSMatthew Auld * 1423821cc7fSMatthew Auld * This is the default on non-LLC platforms. 1433821cc7fSMatthew Auld */ 1443821cc7fSMatthew Auld I915_CACHE_NONE = 0, 1453821cc7fSMatthew Auld /** 1463821cc7fSMatthew Auld * @I915_CACHE_LLC: 1473821cc7fSMatthew Auld * 1483821cc7fSMatthew Auld * GPU access is coherent with the CPU cache. If the cache is dirty, 1493821cc7fSMatthew Auld * then the GPU will ensure that access remains coherent, when both 1503821cc7fSMatthew Auld * reading and writing through the CPU cache. GPU writes can dirty the 1513821cc7fSMatthew Auld * CPU cache. 1523821cc7fSMatthew Auld * 1533821cc7fSMatthew Auld * Not used for scanout surfaces. 1543821cc7fSMatthew Auld * 1553821cc7fSMatthew Auld * Applies to both platforms with shared LLC(HAS_LLC), and snooping 1563821cc7fSMatthew Auld * based platforms(HAS_SNOOP). 1573821cc7fSMatthew Auld * 1583821cc7fSMatthew Auld * This is the default on shared LLC platforms. The only exception is 1593821cc7fSMatthew Auld * scanout objects, where the display engine is not coherent with the 1603821cc7fSMatthew Auld * CPU cache. For such objects I915_CACHE_NONE or I915_CACHE_WT is 1613821cc7fSMatthew Auld * automatically applied by the kernel in pin_for_display, if userspace 1623821cc7fSMatthew Auld * has not done so already. 1633821cc7fSMatthew Auld */ 1643821cc7fSMatthew Auld I915_CACHE_LLC, 1653821cc7fSMatthew Auld /** 1663821cc7fSMatthew Auld * @I915_CACHE_L3_LLC: 1673821cc7fSMatthew Auld * 1683821cc7fSMatthew Auld * Explicitly enable the Gfx L3 cache, with coherent LLC. 1693821cc7fSMatthew Auld * 1703821cc7fSMatthew Auld * The Gfx L3 sits between the domain specific caches, e.g 1713821cc7fSMatthew Auld * sampler/render caches, and the larger LLC. LLC is coherent with the 1723821cc7fSMatthew Auld * GPU, but L3 is only visible to the GPU, so likely needs to be flushed 1733821cc7fSMatthew Auld * when the workload completes. 1743821cc7fSMatthew Auld * 1753821cc7fSMatthew Auld * Not used for scanout surfaces. 1763821cc7fSMatthew Auld * 1773821cc7fSMatthew Auld * Only exposed on some gen7 + GGTT. More recent hardware has dropped 1783821cc7fSMatthew Auld * this explicit setting, where it should now be enabled by default. 1793821cc7fSMatthew Auld */ 1803821cc7fSMatthew Auld I915_CACHE_L3_LLC, 1813821cc7fSMatthew Auld /** 1823821cc7fSMatthew Auld * @I915_CACHE_WT: 1833821cc7fSMatthew Auld * 1843821cc7fSMatthew Auld * Write-through. Used for scanout surfaces. 1853821cc7fSMatthew Auld * 1863821cc7fSMatthew Auld * The GPU can utilise the caches, while still having the display engine 1873821cc7fSMatthew Auld * be coherent with GPU writes, as a result we don't need to flush the 1883821cc7fSMatthew Auld * CPU caches when moving out of the render domain. This is the default 1893821cc7fSMatthew Auld * setting chosen by the kernel, if supported by the HW, otherwise we 1903821cc7fSMatthew Auld * fallback to I915_CACHE_NONE. On the CPU side writes through the CPU 1913821cc7fSMatthew Auld * cache still need to be flushed, to remain coherent with the display 1923821cc7fSMatthew Auld * engine. 1933821cc7fSMatthew Auld */ 1943821cc7fSMatthew Auld I915_CACHE_WT, 1953821cc7fSMatthew Auld }; 1963821cc7fSMatthew Auld 197e2f4367aSMatthew Auld enum i915_map_type { 198e2f4367aSMatthew Auld I915_MAP_WB = 0, 199e2f4367aSMatthew Auld I915_MAP_WC, 200e2f4367aSMatthew Auld #define I915_MAP_OVERRIDE BIT(31) 201e2f4367aSMatthew Auld I915_MAP_FORCE_WB = I915_MAP_WB | I915_MAP_OVERRIDE, 202e2f4367aSMatthew Auld I915_MAP_FORCE_WC = I915_MAP_WC | I915_MAP_OVERRIDE, 203e2f4367aSMatthew Auld }; 204e2f4367aSMatthew Auld 205cc662126SAbdiel Janulgue enum i915_mmap_type { 206cc662126SAbdiel Janulgue I915_MMAP_TYPE_GTT = 0, 207cc662126SAbdiel Janulgue I915_MMAP_TYPE_WC, 208cc662126SAbdiel Janulgue I915_MMAP_TYPE_WB, 209cc662126SAbdiel Janulgue I915_MMAP_TYPE_UC, 2107961c5b6SMaarten Lankhorst I915_MMAP_TYPE_FIXED, 211cc662126SAbdiel Janulgue }; 212cc662126SAbdiel Janulgue 213cc662126SAbdiel Janulgue struct i915_mmap_offset { 214cc662126SAbdiel Janulgue struct drm_vma_offset_node vma_node; 215cc662126SAbdiel Janulgue struct drm_i915_gem_object *obj; 216cc662126SAbdiel Janulgue enum i915_mmap_type mmap_type; 217cc662126SAbdiel Janulgue 21878655598SChris Wilson struct rb_node offset; 219cc662126SAbdiel Janulgue }; 220cc662126SAbdiel Janulgue 221934941edSTvrtko Ursulin struct i915_gem_object_page_iter { 222934941edSTvrtko Ursulin struct scatterlist *sg_pos; 223934941edSTvrtko Ursulin unsigned int sg_idx; /* in pages, but 32bit eek! */ 224934941edSTvrtko Ursulin 225934941edSTvrtko Ursulin struct radix_tree_root radix; 226934941edSTvrtko Ursulin struct mutex lock; /* protects this cache */ 227934941edSTvrtko Ursulin }; 228934941edSTvrtko Ursulin 2295e5d2e20SChris Wilson struct drm_i915_gem_object { 230f4db23f2SThomas Hellström /* 231f4db23f2SThomas Hellström * We might have reason to revisit the below since it wastes 232f4db23f2SThomas Hellström * a lot of space for non-ttm gem objects. 233f4db23f2SThomas Hellström * In any case, always use the accessors for the ttm_buffer_object 234f4db23f2SThomas Hellström * when accessing it. 235f4db23f2SThomas Hellström */ 236f4db23f2SThomas Hellström union { 2375e5d2e20SChris Wilson struct drm_gem_object base; 238f4db23f2SThomas Hellström struct ttm_buffer_object __do_not_access; 239f4db23f2SThomas Hellström }; 2405e5d2e20SChris Wilson 2415e5d2e20SChris Wilson const struct drm_i915_gem_object_ops *ops; 2425e5d2e20SChris Wilson 2435e5d2e20SChris Wilson struct { 2445e5d2e20SChris Wilson /** 2455e5d2e20SChris Wilson * @vma.lock: protect the list/tree of vmas 2465e5d2e20SChris Wilson */ 2475e5d2e20SChris Wilson spinlock_t lock; 2485e5d2e20SChris Wilson 2495e5d2e20SChris Wilson /** 2505e5d2e20SChris Wilson * @vma.list: List of VMAs backed by this object 2515e5d2e20SChris Wilson * 2525e5d2e20SChris Wilson * The VMA on this list are ordered by type, all GGTT vma are 2535e5d2e20SChris Wilson * placed at the head and all ppGTT vma are placed at the tail. 2545e5d2e20SChris Wilson * The different types of GGTT vma are unordered between 2555e5d2e20SChris Wilson * themselves, use the @vma.tree (which has a defined order 2565e5d2e20SChris Wilson * between all VMA) to quickly find an exact match. 2575e5d2e20SChris Wilson */ 2585e5d2e20SChris Wilson struct list_head list; 2595e5d2e20SChris Wilson 2605e5d2e20SChris Wilson /** 2615e5d2e20SChris Wilson * @vma.tree: Ordered tree of VMAs backed by this object 2625e5d2e20SChris Wilson * 2635e5d2e20SChris Wilson * All VMA created for this object are placed in the @vma.tree 2645e5d2e20SChris Wilson * for fast retrieval via a binary search in 2655e5d2e20SChris Wilson * i915_vma_instance(). They are also added to @vma.list for 2665e5d2e20SChris Wilson * easy iteration. 2675e5d2e20SChris Wilson */ 2685e5d2e20SChris Wilson struct rb_root tree; 2695e5d2e20SChris Wilson } vma; 2705e5d2e20SChris Wilson 2715e5d2e20SChris Wilson /** 2725e5d2e20SChris Wilson * @lut_list: List of vma lookup entries in use for this object. 2735e5d2e20SChris Wilson * 2745e5d2e20SChris Wilson * If this object is closed, we need to remove all of its VMA from 2755e5d2e20SChris Wilson * the fast lookup index in associated contexts; @lut_list provides 2765e5d2e20SChris Wilson * this translation from object to context->handles_vma. 2775e5d2e20SChris Wilson */ 2785e5d2e20SChris Wilson struct list_head lut_list; 279096a42ddSChris Wilson spinlock_t lut_lock; /* guards lut_list */ 2805e5d2e20SChris Wilson 28180f0b679SMaarten Lankhorst /** 28280f0b679SMaarten Lankhorst * @obj_link: Link into @i915_gem_ww_ctx.obj_list 28380f0b679SMaarten Lankhorst * 28480f0b679SMaarten Lankhorst * When we lock this object through i915_gem_object_lock() with a 28580f0b679SMaarten Lankhorst * context, we add it to the list to ensure we can unlock everything 28680f0b679SMaarten Lankhorst * when i915_gem_ww_ctx_backoff() or i915_gem_ww_ctx_fini() are called. 28780f0b679SMaarten Lankhorst */ 28880f0b679SMaarten Lankhorst struct list_head obj_link; 2894d8151aeSThomas Hellström /** 2904d8151aeSThomas Hellström * @shared_resv_from: The object shares the resv from this vm. 2914d8151aeSThomas Hellström */ 2924d8151aeSThomas Hellström struct i915_address_space *shares_resv_from; 29380f0b679SMaarten Lankhorst 2945e5d2e20SChris Wilson union { 2955e5d2e20SChris Wilson struct rcu_head rcu; 2965e5d2e20SChris Wilson struct llist_node freed; 2975e5d2e20SChris Wilson }; 2985e5d2e20SChris Wilson 2995e5d2e20SChris Wilson /** 3005e5d2e20SChris Wilson * Whether the object is currently in the GGTT mmap. 3015e5d2e20SChris Wilson */ 3025e5d2e20SChris Wilson unsigned int userfault_count; 3035e5d2e20SChris Wilson struct list_head userfault_link; 3045e5d2e20SChris Wilson 305cc662126SAbdiel Janulgue struct { 306cc662126SAbdiel Janulgue spinlock_t lock; /* Protects access to mmo offsets */ 30778655598SChris Wilson struct rb_root offsets; 308cc662126SAbdiel Janulgue } mmo; 309cc662126SAbdiel Janulgue 3105e5d2e20SChris Wilson I915_SELFTEST_DECLARE(struct list_head st_link); 3115e5d2e20SChris Wilson 3122f0b97caSMatthew Auld unsigned long flags; 3132f0b97caSMatthew Auld #define I915_BO_ALLOC_CONTIGUOUS BIT(0) 3147c98501aSMatthew Auld #define I915_BO_ALLOC_VOLATILE BIT(1) 3150ff37575SThomas Hellström #define I915_BO_ALLOC_CPU_CLEAR BIT(2) 3160ff37575SThomas Hellström #define I915_BO_ALLOC_USER BIT(3) 3170d8ee5baSThomas Hellström /* Object is allowed to lose its contents on suspend / resume, even if pinned */ 3180d8ee5baSThomas Hellström #define I915_BO_ALLOC_PM_VOLATILE BIT(4) 319a259cc14SThomas Hellström /* Object needs to be restored early using memcpy during resume */ 320a259cc14SThomas Hellström #define I915_BO_ALLOC_PM_EARLY BIT(5) 321c471748dSMaarten Lankhorst #define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | \ 322c471748dSMaarten Lankhorst I915_BO_ALLOC_VOLATILE | \ 323213d5092SThomas Hellström I915_BO_ALLOC_CPU_CLEAR | \ 3240d8ee5baSThomas Hellström I915_BO_ALLOC_USER | \ 325a259cc14SThomas Hellström I915_BO_ALLOC_PM_VOLATILE | \ 326a259cc14SThomas Hellström I915_BO_ALLOC_PM_EARLY) 327a259cc14SThomas Hellström #define I915_BO_READONLY BIT(6) 328a259cc14SThomas Hellström #define I915_TILING_QUIRK_BIT 7 /* unknown swizzling; do not release! */ 329d3ac8d42SDaniele Ceraolo Spurio #define I915_BO_PROTECTED BIT(8) 3300ff37575SThomas Hellström /** 3310ff37575SThomas Hellström * @mem_flags - Mutable placement-related flags 3320ff37575SThomas Hellström * 3330ff37575SThomas Hellström * These are flags that indicate specifics of the memory region 3340ff37575SThomas Hellström * the object is currently in. As such they are only stable 3350ff37575SThomas Hellström * either under the object lock or if the object is pinned. 3360ff37575SThomas Hellström */ 3370ff37575SThomas Hellström unsigned int mem_flags; 3380ff37575SThomas Hellström #define I915_BO_FLAG_STRUCT_PAGE BIT(0) /* Object backed by struct pages */ 3390ff37575SThomas Hellström #define I915_BO_FLAG_IOMEM BIT(1) /* Object backed by IO memory */ 3403821cc7fSMatthew Auld /** 3413821cc7fSMatthew Auld * @cache_level: The desired GTT caching level. 3423821cc7fSMatthew Auld * 3433821cc7fSMatthew Auld * See enum i915_cache_level for possible values, along with what 3443821cc7fSMatthew Auld * each does. 3455e5d2e20SChris Wilson */ 3465e5d2e20SChris Wilson unsigned int cache_level:3; 3473821cc7fSMatthew Auld /** 3483821cc7fSMatthew Auld * @cache_coherent: 3493821cc7fSMatthew Auld * 3503821cc7fSMatthew Auld * Track whether the pages are coherent with the GPU if reading or 3513821cc7fSMatthew Auld * writing through the CPU caches. The largely depends on the 3523821cc7fSMatthew Auld * @cache_level setting. 3533821cc7fSMatthew Auld * 3543821cc7fSMatthew Auld * On platforms which don't have the shared LLC(HAS_SNOOP), like on Atom 3553821cc7fSMatthew Auld * platforms, coherency must be explicitly requested with some special 3563821cc7fSMatthew Auld * GTT caching bits(see enum i915_cache_level). When enabling coherency 3573821cc7fSMatthew Auld * it does come at a performance and power cost on such platforms. On 3583821cc7fSMatthew Auld * the flip side the kernel does not need to manually flush any buffers 3593821cc7fSMatthew Auld * which need to be coherent with the GPU, if the object is not coherent 3603821cc7fSMatthew Auld * i.e @cache_coherent is zero. 3613821cc7fSMatthew Auld * 3623821cc7fSMatthew Auld * On platforms that share the LLC with the CPU(HAS_LLC), all GT memory 3633821cc7fSMatthew Auld * access will automatically snoop the CPU caches(even with CACHE_NONE). 3643821cc7fSMatthew Auld * The one exception is when dealing with the display engine, like with 3653821cc7fSMatthew Auld * scanout surfaces. To handle this the kernel will always flush the 3663821cc7fSMatthew Auld * surface out of the CPU caches when preparing it for scanout. Also 3673821cc7fSMatthew Auld * note that since scanout surfaces are only ever read by the display 3683821cc7fSMatthew Auld * engine we only need to care about flushing any writes through the CPU 3693821cc7fSMatthew Auld * cache, reads on the other hand will always be coherent. 3703821cc7fSMatthew Auld * 3713821cc7fSMatthew Auld * Something strange here is why @cache_coherent is not a simple 3723821cc7fSMatthew Auld * boolean, i.e coherent vs non-coherent. The reasoning for this is back 3733821cc7fSMatthew Auld * to the display engine not being fully coherent. As a result scanout 3743821cc7fSMatthew Auld * surfaces will either be marked as I915_CACHE_NONE or I915_CACHE_WT. 3753821cc7fSMatthew Auld * In the case of seeing I915_CACHE_NONE the kernel makes the assumption 3763821cc7fSMatthew Auld * that this is likely a scanout surface, and will set @cache_coherent 3773821cc7fSMatthew Auld * as only I915_BO_CACHE_COHERENT_FOR_READ, on platforms with the shared 3783821cc7fSMatthew Auld * LLC. The kernel uses this to always flush writes through the CPU 3793821cc7fSMatthew Auld * cache as early as possible, where it can, in effect keeping 3803821cc7fSMatthew Auld * @cache_dirty clean, so we can potentially avoid stalling when 3813821cc7fSMatthew Auld * flushing the surface just before doing the scanout. This does mean 3823821cc7fSMatthew Auld * we might unnecessarily flush non-scanout objects in some places, but 3833821cc7fSMatthew Auld * the default assumption is that all normal objects should be using 3843821cc7fSMatthew Auld * I915_CACHE_LLC, at least on platforms with the shared LLC. 3853821cc7fSMatthew Auld * 3863821cc7fSMatthew Auld * Supported values: 3873821cc7fSMatthew Auld * 3883821cc7fSMatthew Auld * I915_BO_CACHE_COHERENT_FOR_READ: 3893821cc7fSMatthew Auld * 3903821cc7fSMatthew Auld * On shared LLC platforms, we use this for special scanout surfaces, 3913821cc7fSMatthew Auld * where the display engine is not coherent with the CPU cache. As such 3923821cc7fSMatthew Auld * we need to ensure we flush any writes before doing the scanout. As an 3933821cc7fSMatthew Auld * optimisation we try to flush any writes as early as possible to avoid 3943821cc7fSMatthew Auld * stalling later. 3953821cc7fSMatthew Auld * 3963821cc7fSMatthew Auld * Thus for scanout surfaces using I915_CACHE_NONE, on shared LLC 3973821cc7fSMatthew Auld * platforms, we use: 3983821cc7fSMatthew Auld * 3993821cc7fSMatthew Auld * cache_coherent = I915_BO_CACHE_COHERENT_FOR_READ 4003821cc7fSMatthew Auld * 4013821cc7fSMatthew Auld * While for normal objects that are fully coherent, including special 4023821cc7fSMatthew Auld * scanout surfaces marked as I915_CACHE_WT, we use: 4033821cc7fSMatthew Auld * 4043821cc7fSMatthew Auld * cache_coherent = I915_BO_CACHE_COHERENT_FOR_READ | 4053821cc7fSMatthew Auld * I915_BO_CACHE_COHERENT_FOR_WRITE 4063821cc7fSMatthew Auld * 4073821cc7fSMatthew Auld * And then for objects that are not coherent at all we use: 4083821cc7fSMatthew Auld * 4093821cc7fSMatthew Auld * cache_coherent = 0 4103821cc7fSMatthew Auld * 4113821cc7fSMatthew Auld * I915_BO_CACHE_COHERENT_FOR_WRITE: 4123821cc7fSMatthew Auld * 4133821cc7fSMatthew Auld * When writing through the CPU cache, the GPU is still coherent. Note 4143821cc7fSMatthew Auld * that this also implies I915_BO_CACHE_COHERENT_FOR_READ. 4153821cc7fSMatthew Auld */ 4165e5d2e20SChris Wilson #define I915_BO_CACHE_COHERENT_FOR_READ BIT(0) 4175e5d2e20SChris Wilson #define I915_BO_CACHE_COHERENT_FOR_WRITE BIT(1) 4183821cc7fSMatthew Auld unsigned int cache_coherent:2; 4193821cc7fSMatthew Auld 4203821cc7fSMatthew Auld /** 4213821cc7fSMatthew Auld * @cache_dirty: 4223821cc7fSMatthew Auld * 4233821cc7fSMatthew Auld * Track if we are we dirty with writes through the CPU cache for this 4243821cc7fSMatthew Auld * object. As a result reading directly from main memory might yield 4253821cc7fSMatthew Auld * stale data. 4263821cc7fSMatthew Auld * 4273821cc7fSMatthew Auld * This also ties into whether the kernel is tracking the object as 4283821cc7fSMatthew Auld * coherent with the GPU, as per @cache_coherent, as it determines if 4293821cc7fSMatthew Auld * flushing might be needed at various points. 4303821cc7fSMatthew Auld * 4313821cc7fSMatthew Auld * Another part of @cache_dirty is managing flushing when first 4323821cc7fSMatthew Auld * acquiring the pages for system memory, at this point the pages are 4333821cc7fSMatthew Auld * considered foreign, so the default assumption is that the cache is 4343821cc7fSMatthew Auld * dirty, for example the page zeroing done by the kernel might leave 4353821cc7fSMatthew Auld * writes though the CPU cache, or swapping-in, while the actual data in 4363821cc7fSMatthew Auld * main memory is potentially stale. Note that this is a potential 4373821cc7fSMatthew Auld * security issue when dealing with userspace objects and zeroing. Now, 4383821cc7fSMatthew Auld * whether we actually need apply the big sledgehammer of flushing all 4393821cc7fSMatthew Auld * the pages on acquire depends on if @cache_coherent is marked as 4403821cc7fSMatthew Auld * I915_BO_CACHE_COHERENT_FOR_WRITE, i.e that the GPU will be coherent 4413821cc7fSMatthew Auld * for both reads and writes though the CPU cache. 4423821cc7fSMatthew Auld * 4433821cc7fSMatthew Auld * Note that on shared LLC platforms we still apply the heavy flush for 4443821cc7fSMatthew Auld * I915_CACHE_NONE objects, under the assumption that this is going to 4453821cc7fSMatthew Auld * be used for scanout. 44613d29c82SMatthew Auld * 44713d29c82SMatthew Auld * Update: On some hardware there is now also the 'Bypass LLC' MOCS 44813d29c82SMatthew Auld * entry, which defeats our @cache_coherent tracking, since userspace 44913d29c82SMatthew Auld * can freely bypass the CPU cache when touching the pages with the GPU, 45013d29c82SMatthew Auld * where the kernel is completely unaware. On such platform we need 45113d29c82SMatthew Auld * apply the sledgehammer-on-acquire regardless of the @cache_coherent. 452df94fd05SMatthew Auld * 453df94fd05SMatthew Auld * Special care is taken on non-LLC platforms, to prevent potential 454df94fd05SMatthew Auld * information leak. The driver currently ensures: 455df94fd05SMatthew Auld * 456df94fd05SMatthew Auld * 1. All userspace objects, by default, have @cache_level set as 457df94fd05SMatthew Auld * I915_CACHE_NONE. The only exception is userptr objects, where we 458df94fd05SMatthew Auld * instead force I915_CACHE_LLC, but we also don't allow userspace to 459df94fd05SMatthew Auld * ever change the @cache_level for such objects. Another special case 460df94fd05SMatthew Auld * is dma-buf, which doesn't rely on @cache_dirty, but there we 461df94fd05SMatthew Auld * always do a forced flush when acquiring the pages, if there is a 462df94fd05SMatthew Auld * chance that the pages can be read directly from main memory with 463df94fd05SMatthew Auld * the GPU. 464df94fd05SMatthew Auld * 465df94fd05SMatthew Auld * 2. All I915_CACHE_NONE objects have @cache_dirty initially true. 466df94fd05SMatthew Auld * 467df94fd05SMatthew Auld * 3. All swapped-out objects(i.e shmem) have @cache_dirty set to 468df94fd05SMatthew Auld * true. 469df94fd05SMatthew Auld * 470df94fd05SMatthew Auld * 4. The @cache_dirty is never freely reset before the initial 471df94fd05SMatthew Auld * flush, even if userspace adjusts the @cache_level through the 472df94fd05SMatthew Auld * i915_gem_set_caching_ioctl. 473df94fd05SMatthew Auld * 474df94fd05SMatthew Auld * 5. All @cache_dirty objects(including swapped-in) are initially 475df94fd05SMatthew Auld * flushed with a synchronous call to drm_clflush_sg in 476df94fd05SMatthew Auld * __i915_gem_object_set_pages. The @cache_dirty can be freely reset 477df94fd05SMatthew Auld * at this point. All further asynchronous clfushes are never security 478df94fd05SMatthew Auld * critical, i.e userspace is free to race against itself. 4793821cc7fSMatthew Auld */ 4805e5d2e20SChris Wilson unsigned int cache_dirty:1; 4815e5d2e20SChris Wilson 4825e5d2e20SChris Wilson /** 4835e5d2e20SChris Wilson * @read_domains: Read memory domains. 4845e5d2e20SChris Wilson * 4855e5d2e20SChris Wilson * These monitor which caches contain read/write data related to the 4865e5d2e20SChris Wilson * object. When transitioning from one set of domains to another, 4875e5d2e20SChris Wilson * the driver is called to ensure that caches are suitably flushed and 4885e5d2e20SChris Wilson * invalidated. 4895e5d2e20SChris Wilson */ 4905e5d2e20SChris Wilson u16 read_domains; 4915e5d2e20SChris Wilson 4925e5d2e20SChris Wilson /** 4935e5d2e20SChris Wilson * @write_domain: Corresponding unique write memory domain. 4945e5d2e20SChris Wilson */ 4955e5d2e20SChris Wilson u16 write_domain; 4965e5d2e20SChris Wilson 497da42104fSChris Wilson struct intel_frontbuffer __rcu *frontbuffer; 4985e5d2e20SChris Wilson 4995e5d2e20SChris Wilson /** Current tiling stride for the object, if it's tiled. */ 5005e5d2e20SChris Wilson unsigned int tiling_and_stride; 5015e5d2e20SChris Wilson #define FENCE_MINIMUM_STRIDE 128 /* See i915_tiling_ok() */ 5025e5d2e20SChris Wilson #define TILING_MASK (FENCE_MINIMUM_STRIDE - 1) 5035e5d2e20SChris Wilson #define STRIDE_MASK (~TILING_MASK) 5045e5d2e20SChris Wilson 5055e5d2e20SChris Wilson struct { 506f86dbacbSDaniel Vetter /* 507f86dbacbSDaniel Vetter * Protects the pages and their use. Do not use directly, but 508f86dbacbSDaniel Vetter * instead go through the pin/unpin interfaces. 509f86dbacbSDaniel Vetter */ 5105e5d2e20SChris Wilson atomic_t pages_pin_count; 511e25d1ea4SMatthew Auld 512e25d1ea4SMatthew Auld /** 513e25d1ea4SMatthew Auld * @shrink_pin: Prevents the pages from being made visible to 514e25d1ea4SMatthew Auld * the shrinker, while the shrink_pin is non-zero. Most users 515e25d1ea4SMatthew Auld * should pretty much never have to care about this, outside of 516e25d1ea4SMatthew Auld * some special use cases. 517e25d1ea4SMatthew Auld * 518e25d1ea4SMatthew Auld * By default most objects will start out as visible to the 519e25d1ea4SMatthew Auld * shrinker(if I915_GEM_OBJECT_IS_SHRINKABLE) as soon as the 520e25d1ea4SMatthew Auld * backing pages are attached to the object, like in 521e25d1ea4SMatthew Auld * __i915_gem_object_set_pages(). They will then be removed the 522e25d1ea4SMatthew Auld * shrinker list once the pages are released. 523e25d1ea4SMatthew Auld * 524e25d1ea4SMatthew Auld * The @shrink_pin is incremented by calling 525e25d1ea4SMatthew Auld * i915_gem_object_make_unshrinkable(), which will also remove 526e25d1ea4SMatthew Auld * the object from the shrinker list, if the pin count was zero. 527e25d1ea4SMatthew Auld * 528e25d1ea4SMatthew Auld * Callers will then typically call 529e25d1ea4SMatthew Auld * i915_gem_object_make_shrinkable() or 530e25d1ea4SMatthew Auld * i915_gem_object_make_purgeable() to decrement the pin count, 531e25d1ea4SMatthew Auld * and make the pages visible again. 532e25d1ea4SMatthew Auld */ 53399013b10SChris Wilson atomic_t shrink_pin; 5345e5d2e20SChris Wilson 535232a6ebaSMatthew Auld /** 536ebd4a8ecSMatthew Auld * @ttm_shrinkable: True when the object is using shmem pages 537ebd4a8ecSMatthew Auld * underneath. Protected by the object lock. 538ebd4a8ecSMatthew Auld */ 539ebd4a8ecSMatthew Auld bool ttm_shrinkable; 540ebd4a8ecSMatthew Auld 541ebd4a8ecSMatthew Auld /** 5422459e56fSMatthew Auld * Priority list of potential placements for this object. 5432459e56fSMatthew Auld */ 5442459e56fSMatthew Auld struct intel_memory_region **placements; 5452459e56fSMatthew Auld int n_placements; 5462459e56fSMatthew Auld 5472459e56fSMatthew Auld /** 548232a6ebaSMatthew Auld * Memory region for this object. 549232a6ebaSMatthew Auld */ 550232a6ebaSMatthew Auld struct intel_memory_region *region; 551d1487389SThomas Hellström 552232a6ebaSMatthew Auld /** 553687c7d0fSMatthew Auld * Memory manager resource allocated for this object. Only 554687c7d0fSMatthew Auld * needed for the mock region. 555232a6ebaSMatthew Auld */ 556687c7d0fSMatthew Auld struct ttm_resource *res; 557d1487389SThomas Hellström 5587c98501aSMatthew Auld /** 5597c98501aSMatthew Auld * Element within memory_region->objects or region->purgeable 5607c98501aSMatthew Auld * if the object is marked as DONTNEED. Access is protected by 5617c98501aSMatthew Auld * region->obj_lock. 5627c98501aSMatthew Auld */ 5637c98501aSMatthew Auld struct list_head region_link; 564232a6ebaSMatthew Auld 565cad7109aSThomas Hellström struct i915_refct_sgt *rsgt; 5665e5d2e20SChris Wilson struct sg_table *pages; 5675e5d2e20SChris Wilson void *mapping; 5685e5d2e20SChris Wilson 5695e5d2e20SChris Wilson struct i915_page_sizes { 5705e5d2e20SChris Wilson /** 5715e5d2e20SChris Wilson * The sg mask of the pages sg_table. i.e the mask of 5725e5d2e20SChris Wilson * of the lengths for each sg entry. 5735e5d2e20SChris Wilson */ 5745e5d2e20SChris Wilson unsigned int phys; 5755e5d2e20SChris Wilson 5765e5d2e20SChris Wilson /** 5775e5d2e20SChris Wilson * The gtt page sizes we are allowed to use given the 5785e5d2e20SChris Wilson * sg mask and the supported page sizes. This will 5795e5d2e20SChris Wilson * express the smallest unit we can use for the whole 5805e5d2e20SChris Wilson * object, as well as the larger sizes we may be able 5815e5d2e20SChris Wilson * to use opportunistically. 5825e5d2e20SChris Wilson */ 5835e5d2e20SChris Wilson unsigned int sg; 5845e5d2e20SChris Wilson 5855e5d2e20SChris Wilson /** 5865e5d2e20SChris Wilson * The actual gtt page size usage. Since we can have 5875e5d2e20SChris Wilson * multiple vma associated with this object we need to 5885e5d2e20SChris Wilson * prevent any trampling of state, hence a copy of this 5895e5d2e20SChris Wilson * struct also lives in each vma, therefore the gtt 5905e5d2e20SChris Wilson * value here should only be read/write through the vma. 5915e5d2e20SChris Wilson */ 5925e5d2e20SChris Wilson unsigned int gtt; 5935e5d2e20SChris Wilson } page_sizes; 5945e5d2e20SChris Wilson 5955e5d2e20SChris Wilson I915_SELFTEST_DECLARE(unsigned int page_mask); 5965e5d2e20SChris Wilson 597934941edSTvrtko Ursulin struct i915_gem_object_page_iter get_page; 598934941edSTvrtko Ursulin struct i915_gem_object_page_iter get_dma_page; 5995e5d2e20SChris Wilson 6005e5d2e20SChris Wilson /** 601e25d1ea4SMatthew Auld * Element within i915->mm.shrink_list or i915->mm.purge_list, 6025e5d2e20SChris Wilson * locked by i915->mm.obj_lock. 6035e5d2e20SChris Wilson */ 6045e5d2e20SChris Wilson struct list_head link; 6055e5d2e20SChris Wilson 6065e5d2e20SChris Wilson /** 6075e5d2e20SChris Wilson * Advice: are the backing pages purgeable? 6085e5d2e20SChris Wilson */ 6095e5d2e20SChris Wilson unsigned int madv:2; 6105e5d2e20SChris Wilson 6115e5d2e20SChris Wilson /** 6125e5d2e20SChris Wilson * This is set if the object has been written to since the 6135e5d2e20SChris Wilson * pages were last acquired. 6145e5d2e20SChris Wilson */ 6155e5d2e20SChris Wilson bool dirty:1; 6165e5d2e20SChris Wilson } mm; 6175e5d2e20SChris Wilson 618213d5092SThomas Hellström struct { 619cad7109aSThomas Hellström struct i915_refct_sgt *cached_io_rsgt; 620cf3e3e86SMaarten Lankhorst struct i915_gem_object_page_iter get_io_page; 621c56ce956SThomas Hellström struct drm_i915_gem_object *backup; 622213d5092SThomas Hellström bool created:1; 623213d5092SThomas Hellström } ttm; 624213d5092SThomas Hellström 625d3ac8d42SDaniele Ceraolo Spurio /* 626d3ac8d42SDaniele Ceraolo Spurio * Record which PXP key instance this object was created against (if 627d3ac8d42SDaniele Ceraolo Spurio * any), so we can use it to determine if the encryption is valid by 628d3ac8d42SDaniele Ceraolo Spurio * comparing against the current key instance. 629d3ac8d42SDaniele Ceraolo Spurio */ 630d3ac8d42SDaniele Ceraolo Spurio u32 pxp_key_instance; 631d3ac8d42SDaniele Ceraolo Spurio 6325e5d2e20SChris Wilson /** Record of address bit 17 of each page at last unbind. */ 6335e5d2e20SChris Wilson unsigned long *bit_17; 6345e5d2e20SChris Wilson 6355e5d2e20SChris Wilson union { 63620ee27bdSMaarten Lankhorst #ifdef CONFIG_MMU_NOTIFIER 6375e5d2e20SChris Wilson struct i915_gem_userptr { 6385e5d2e20SChris Wilson uintptr_t ptr; 639ed29c269SMaarten Lankhorst unsigned long notifier_seq; 6405e5d2e20SChris Wilson 641ed29c269SMaarten Lankhorst struct mmu_interval_notifier notifier; 642ed29c269SMaarten Lankhorst struct page **pvec; 643ed29c269SMaarten Lankhorst int page_ref; 6445e5d2e20SChris Wilson } userptr; 64520ee27bdSMaarten Lankhorst #endif 6465e5d2e20SChris Wilson 64741a9c75dSChris Wilson struct drm_mm_node *stolen; 64841a9c75dSChris Wilson 6495e5d2e20SChris Wilson unsigned long scratch; 65089351925SChris Wilson u64 encode; 6515e5d2e20SChris Wilson 6525e5d2e20SChris Wilson void *gvt_info; 6535e5d2e20SChris Wilson }; 6545e5d2e20SChris Wilson }; 6555e5d2e20SChris Wilson 6565e5d2e20SChris Wilson static inline struct drm_i915_gem_object * 6575e5d2e20SChris Wilson to_intel_bo(struct drm_gem_object *gem) 6585e5d2e20SChris Wilson { 6595e5d2e20SChris Wilson /* Assert that to_intel_bo(NULL) == NULL */ 6605e5d2e20SChris Wilson BUILD_BUG_ON(offsetof(struct drm_i915_gem_object, base)); 6615e5d2e20SChris Wilson 6625e5d2e20SChris Wilson return container_of(gem, struct drm_i915_gem_object, base); 6635e5d2e20SChris Wilson } 6645e5d2e20SChris Wilson 6655e5d2e20SChris Wilson #endif 666