i915/gem/i915_gem_object_types.h

5e5d2e20SChris Wilson/*
5e5d2e20SChris Wilson * SPDX-License-Identifier: MIT
5e5d2e20SChris Wilson *
5e5d2e20SChris Wilson * Copyright © 2016 Intel Corporation
5e5d2e20SChris Wilson */
5e5d2e20SChris Wilson
5e5d2e20SChris Wilson#ifndef __I915_GEM_OBJECT_TYPES_H__
5e5d2e20SChris Wilson#define __I915_GEM_OBJECT_TYPES_H__
5e5d2e20SChris Wilson
ed29c269SMaarten Lankhorst#include <linux/mmu_notifier.h>
ed29c269SMaarten Lankhorst
5e5d2e20SChris Wilson#include <drm/drm_gem.h>
f4db23f2SThomas Hellström#include <drm/ttm/ttm_bo_api.h>
b1e3177bSChris Wilson#include <uapi/drm/i915_drm.h>
5e5d2e20SChris Wilson
5e5d2e20SChris Wilson#include "i915_active.h"
5e5d2e20SChris Wilson#include "i915_selftest.h"
5e5d2e20SChris Wilson
5e5d2e20SChris Wilsonstruct drm_i915_gem_object;
8e7cb179SChris Wilsonstruct intel_fronbuffer;
b6e913e1SThomas Hellströmstruct intel_memory_region;
5e5d2e20SChris Wilson
5e5d2e20SChris Wilson/*
5e5d2e20SChris Wilson * struct i915_lut_handle tracks the fast lookups from handle to vma used
5e5d2e20SChris Wilson * for execbuf. Although we use a radixtree for that mapping, in order to
5e5d2e20SChris Wilson * remove them as the object or context is closed, we need a secondary list
5e5d2e20SChris Wilson * and a translation entry (i915_lut_handle).
5e5d2e20SChris Wilson */
5e5d2e20SChris Wilsonstruct i915_lut_handle {
5e5d2e20SChris Wilson	struct list_head obj_link;
5e5d2e20SChris Wilson	struct i915_gem_context *ctx;
5e5d2e20SChris Wilson	u32 handle;
5e5d2e20SChris Wilson};
5e5d2e20SChris Wilson
5e5d2e20SChris Wilsonstruct drm_i915_gem_object_ops {
5e5d2e20SChris Wilson	unsigned int flags;
0ff37575SThomas Hellström#define I915_GEM_OBJECT_IS_SHRINKABLE			BIT(1)
ebd4a8ecSMatthew Auld/* Skip the shrinker management in set_pages/unset_pages */
ebd4a8ecSMatthew Auld#define I915_GEM_OBJECT_SELF_MANAGED_SHRINK_LIST	BIT(2)
ebd4a8ecSMatthew Auld#define I915_GEM_OBJECT_IS_PROXY			BIT(3)
ebd4a8ecSMatthew Auld#define I915_GEM_OBJECT_NO_MMAP				BIT(4)
5e5d2e20SChris Wilson
5e5d2e20SChris Wilson	/* Interface between the GEM object and its backing storage.
5e5d2e20SChris Wilson	 * get_pages() is called once prior to the use of the associated set
5e5d2e20SChris Wilson	 * of pages before to binding them into the GTT, and put_pages() is
5e5d2e20SChris Wilson	 * called after we no longer need them. As we expect there to be
5e5d2e20SChris Wilson	 * associated cost with migrating pages between the backing storage
5e5d2e20SChris Wilson	 * and making them available for the GPU (e.g. clflush), we may hold
5e5d2e20SChris Wilson	 * onto the pages after they are no longer referenced by the GPU
5e5d2e20SChris Wilson	 * in case they may be used again shortly (for example migrating the
5e5d2e20SChris Wilson	 * pages to a different memory domain within the GTT). put_pages()
5e5d2e20SChris Wilson	 * will therefore most likely be called when the object itself is
5e5d2e20SChris Wilson	 * being released or under memory pressure (where we attempt to
5e5d2e20SChris Wilson	 * reap pages for the shrinker).
5e5d2e20SChris Wilson	 */
5e5d2e20SChris Wilson	int (*get_pages)(struct drm_i915_gem_object *obj);
5e5d2e20SChris Wilson	void (*put_pages)(struct drm_i915_gem_object *obj,
5e5d2e20SChris Wilson			  struct sg_table *pages);
7ae03459SMatthew Auld	int (*truncate)(struct drm_i915_gem_object *obj);
ffa3fe08SMatthew Auld	/**
ffa3fe08SMatthew Auld	 * shrink - Perform further backend specific actions to facilate
ffa3fe08SMatthew Auld	 * shrinking.
ffa3fe08SMatthew Auld	 * @obj: The gem object
ffa3fe08SMatthew Auld	 * @flags: Extra flags to control shrinking behaviour in the backend
ffa3fe08SMatthew Auld	 *
ffa3fe08SMatthew Auld	 * Possible values for @flags:
ffa3fe08SMatthew Auld	 *
ffa3fe08SMatthew Auld	 * I915_GEM_OBJECT_SHRINK_WRITEBACK - Try to perform writeback of the
ffa3fe08SMatthew Auld	 * backing pages, if supported.
ffa3fe08SMatthew Auld	 *
ffa3fe08SMatthew Auld	 * I915_GEM_OBJECT_SHRINK_NO_GPU_WAIT - Don't wait for the object to
ffa3fe08SMatthew Auld	 * idle.  Active objects can be considered later. The TTM backend for
ffa3fe08SMatthew Auld	 * example might have aync migrations going on, which don't use any
ffa3fe08SMatthew Auld	 * i915_vma to track the active GTT binding, and hence having an unbound
ffa3fe08SMatthew Auld	 * object might not be enough.
ffa3fe08SMatthew Auld	 */
ffa3fe08SMatthew Auld#define I915_GEM_OBJECT_SHRINK_WRITEBACK   BIT(0)
ffa3fe08SMatthew Auld#define I915_GEM_OBJECT_SHRINK_NO_GPU_WAIT BIT(1)
ffa3fe08SMatthew Auld	int (*shrink)(struct drm_i915_gem_object *obj, unsigned int flags);
5e5d2e20SChris Wilson
0049b688SMatthew Auld	int (*pread)(struct drm_i915_gem_object *obj,
0049b688SMatthew Auld		     const struct drm_i915_gem_pread *arg);
5e5d2e20SChris Wilson	int (*pwrite)(struct drm_i915_gem_object *obj,
5e5d2e20SChris Wilson		      const struct drm_i915_gem_pwrite *arg);
cf3e3e86SMaarten Lankhorst	u64 (*mmap_offset)(struct drm_i915_gem_object *obj);
*903e0387SMatthew Auld	void (*unmap_virtual)(struct drm_i915_gem_object *obj);
5e5d2e20SChris Wilson
5e5d2e20SChris Wilson	int (*dmabuf_export)(struct drm_i915_gem_object *obj);
213d5092SThomas Hellström
213d5092SThomas Hellström	/**
213d5092SThomas Hellström	 * adjust_lru - notify that the madvise value was updated
213d5092SThomas Hellström	 * @obj: The gem object
213d5092SThomas Hellström	 *
213d5092SThomas Hellström	 * The madvise value may have been updated, or object was recently
213d5092SThomas Hellström	 * referenced so act accordingly (Perhaps changing an LRU list etc).
213d5092SThomas Hellström	 */
213d5092SThomas Hellström	void (*adjust_lru)(struct drm_i915_gem_object *obj);
213d5092SThomas Hellström
213d5092SThomas Hellström	/**
213d5092SThomas Hellström	 * delayed_free - Override the default delayed free implementation
213d5092SThomas Hellström	 */
213d5092SThomas Hellström	void (*delayed_free)(struct drm_i915_gem_object *obj);
b6e913e1SThomas Hellström
b6e913e1SThomas Hellström	/**
b6e913e1SThomas Hellström	 * migrate - Migrate object to a different region either for
b6e913e1SThomas Hellström	 * pinning or for as long as the object lock is held.
b6e913e1SThomas Hellström	 */
b6e913e1SThomas Hellström	int (*migrate)(struct drm_i915_gem_object *obj,
b6e913e1SThomas Hellström		       struct intel_memory_region *mr);
b6e913e1SThomas Hellström
5e5d2e20SChris Wilson	void (*release)(struct drm_i915_gem_object *obj);
7d192daaSChris Wilson
cf3e3e86SMaarten Lankhorst	const struct vm_operations_struct *mmap_ops;
7d192daaSChris Wilson	const char *name; /* friendly name for debug, e.g. lockdep classes */
5e5d2e20SChris Wilson};
5e5d2e20SChris Wilson
3821cc7fSMatthew Auld/**
3821cc7fSMatthew Auld * enum i915_cache_level - The supported GTT caching values for system memory
3821cc7fSMatthew Auld * pages.
3821cc7fSMatthew Auld *
3821cc7fSMatthew Auld * These translate to some special GTT PTE bits when binding pages into some
3821cc7fSMatthew Auld * address space. It also determines whether an object, or rather its pages are
3821cc7fSMatthew Auld * coherent with the GPU, when also reading or writing through the CPU cache
3821cc7fSMatthew Auld * with those pages.
3821cc7fSMatthew Auld *
3821cc7fSMatthew Auld * Userspace can also control this through struct drm_i915_gem_caching.
3821cc7fSMatthew Auld */
3821cc7fSMatthew Auldenum i915_cache_level {
3821cc7fSMatthew Auld	/**
3821cc7fSMatthew Auld	 * @I915_CACHE_NONE:
3821cc7fSMatthew Auld	 *
3821cc7fSMatthew Auld	 * GPU access is not coherent with the CPU cache. If the cache is dirty
3821cc7fSMatthew Auld	 * and we need the underlying pages to be coherent with some later GPU
3821cc7fSMatthew Auld	 * access then we need to manually flush the pages.
3821cc7fSMatthew Auld	 *
3821cc7fSMatthew Auld	 * On shared LLC platforms reads and writes through the CPU cache are
3821cc7fSMatthew Auld	 * still coherent even with this setting. See also
3821cc7fSMatthew Auld	 * &drm_i915_gem_object.cache_coherent for more details. Due to this we
3821cc7fSMatthew Auld	 * should only ever use uncached for scanout surfaces, otherwise we end
3821cc7fSMatthew Auld	 * up over-flushing in some places.
3821cc7fSMatthew Auld	 *
3821cc7fSMatthew Auld	 * This is the default on non-LLC platforms.
3821cc7fSMatthew Auld	 */
3821cc7fSMatthew Auld	I915_CACHE_NONE = 0,
3821cc7fSMatthew Auld	/**
3821cc7fSMatthew Auld	 * @I915_CACHE_LLC:
3821cc7fSMatthew Auld	 *
3821cc7fSMatthew Auld	 * GPU access is coherent with the CPU cache. If the cache is dirty,
3821cc7fSMatthew Auld	 * then the GPU will ensure that access remains coherent, when both
3821cc7fSMatthew Auld	 * reading and writing through the CPU cache. GPU writes can dirty the
3821cc7fSMatthew Auld	 * CPU cache.
3821cc7fSMatthew Auld	 *
3821cc7fSMatthew Auld	 * Not used for scanout surfaces.
3821cc7fSMatthew Auld	 *
3821cc7fSMatthew Auld	 * Applies to both platforms with shared LLC(HAS_LLC), and snooping
3821cc7fSMatthew Auld	 * based platforms(HAS_SNOOP).
3821cc7fSMatthew Auld	 *
3821cc7fSMatthew Auld	 * This is the default on shared LLC platforms.  The only exception is
3821cc7fSMatthew Auld	 * scanout objects, where the display engine is not coherent with the
3821cc7fSMatthew Auld	 * CPU cache. For such objects I915_CACHE_NONE or I915_CACHE_WT is
3821cc7fSMatthew Auld	 * automatically applied by the kernel in pin_for_display, if userspace
3821cc7fSMatthew Auld	 * has not done so already.
3821cc7fSMatthew Auld	 */
3821cc7fSMatthew Auld	I915_CACHE_LLC,
3821cc7fSMatthew Auld	/**
3821cc7fSMatthew Auld	 * @I915_CACHE_L3_LLC:
3821cc7fSMatthew Auld	 *
3821cc7fSMatthew Auld	 * Explicitly enable the Gfx L3 cache, with coherent LLC.
3821cc7fSMatthew Auld	 *
3821cc7fSMatthew Auld	 * The Gfx L3 sits between the domain specific caches, e.g
3821cc7fSMatthew Auld	 * sampler/render caches, and the larger LLC. LLC is coherent with the
3821cc7fSMatthew Auld	 * GPU, but L3 is only visible to the GPU, so likely needs to be flushed
3821cc7fSMatthew Auld	 * when the workload completes.
3821cc7fSMatthew Auld	 *
3821cc7fSMatthew Auld	 * Not used for scanout surfaces.
3821cc7fSMatthew Auld	 *
3821cc7fSMatthew Auld	 * Only exposed on some gen7 + GGTT. More recent hardware has dropped
3821cc7fSMatthew Auld	 * this explicit setting, where it should now be enabled by default.
3821cc7fSMatthew Auld	 */
3821cc7fSMatthew Auld	I915_CACHE_L3_LLC,
3821cc7fSMatthew Auld	/**
3821cc7fSMatthew Auld	 * @I915_CACHE_WT:
3821cc7fSMatthew Auld	 *
3821cc7fSMatthew Auld	 * Write-through. Used for scanout surfaces.
3821cc7fSMatthew Auld	 *
3821cc7fSMatthew Auld	 * The GPU can utilise the caches, while still having the display engine
3821cc7fSMatthew Auld	 * be coherent with GPU writes, as a result we don't need to flush the
3821cc7fSMatthew Auld	 * CPU caches when moving out of the render domain. This is the default
3821cc7fSMatthew Auld	 * setting chosen by the kernel, if supported by the HW, otherwise we
3821cc7fSMatthew Auld	 * fallback to I915_CACHE_NONE. On the CPU side writes through the CPU
3821cc7fSMatthew Auld	 * cache still need to be flushed, to remain coherent with the display
3821cc7fSMatthew Auld	 * engine.
3821cc7fSMatthew Auld	 */
3821cc7fSMatthew Auld	I915_CACHE_WT,
3821cc7fSMatthew Auld};
3821cc7fSMatthew Auld
e2f4367aSMatthew Auldenum i915_map_type {
e2f4367aSMatthew Auld	I915_MAP_WB = 0,
e2f4367aSMatthew Auld	I915_MAP_WC,
e2f4367aSMatthew Auld#define I915_MAP_OVERRIDE BIT(31)
e2f4367aSMatthew Auld	I915_MAP_FORCE_WB = I915_MAP_WB | I915_MAP_OVERRIDE,
e2f4367aSMatthew Auld	I915_MAP_FORCE_WC = I915_MAP_WC | I915_MAP_OVERRIDE,
e2f4367aSMatthew Auld};
e2f4367aSMatthew Auld
cc662126SAbdiel Janulgueenum i915_mmap_type {
cc662126SAbdiel Janulgue	I915_MMAP_TYPE_GTT = 0,
cc662126SAbdiel Janulgue	I915_MMAP_TYPE_WC,
cc662126SAbdiel Janulgue	I915_MMAP_TYPE_WB,
cc662126SAbdiel Janulgue	I915_MMAP_TYPE_UC,
7961c5b6SMaarten Lankhorst	I915_MMAP_TYPE_FIXED,
cc662126SAbdiel Janulgue};
cc662126SAbdiel Janulgue
cc662126SAbdiel Janulguestruct i915_mmap_offset {
cc662126SAbdiel Janulgue	struct drm_vma_offset_node vma_node;
cc662126SAbdiel Janulgue	struct drm_i915_gem_object *obj;
cc662126SAbdiel Janulgue	enum i915_mmap_type mmap_type;
cc662126SAbdiel Janulgue
78655598SChris Wilson	struct rb_node offset;
cc662126SAbdiel Janulgue};
cc662126SAbdiel Janulgue
934941edSTvrtko Ursulinstruct i915_gem_object_page_iter {
934941edSTvrtko Ursulin	struct scatterlist *sg_pos;
934941edSTvrtko Ursulin	unsigned int sg_idx; /* in pages, but 32bit eek! */
934941edSTvrtko Ursulin
934941edSTvrtko Ursulin	struct radix_tree_root radix;
934941edSTvrtko Ursulin	struct mutex lock; /* protects this cache */
934941edSTvrtko Ursulin};
934941edSTvrtko Ursulin
5e5d2e20SChris Wilsonstruct drm_i915_gem_object {
f4db23f2SThomas Hellström	/*
f4db23f2SThomas Hellström	 * We might have reason to revisit the below since it wastes
f4db23f2SThomas Hellström	 * a lot of space for non-ttm gem objects.
f4db23f2SThomas Hellström	 * In any case, always use the accessors for the ttm_buffer_object
f4db23f2SThomas Hellström	 * when accessing it.
f4db23f2SThomas Hellström	 */
f4db23f2SThomas Hellström	union {
5e5d2e20SChris Wilson		struct drm_gem_object base;
f4db23f2SThomas Hellström		struct ttm_buffer_object __do_not_access;
f4db23f2SThomas Hellström	};
5e5d2e20SChris Wilson
5e5d2e20SChris Wilson	const struct drm_i915_gem_object_ops *ops;
5e5d2e20SChris Wilson
5e5d2e20SChris Wilson	struct {
5e5d2e20SChris Wilson		/**
5e5d2e20SChris Wilson		 * @vma.lock: protect the list/tree of vmas
5e5d2e20SChris Wilson		 */
5e5d2e20SChris Wilson		spinlock_t lock;
5e5d2e20SChris Wilson
5e5d2e20SChris Wilson		/**
5e5d2e20SChris Wilson		 * @vma.list: List of VMAs backed by this object
5e5d2e20SChris Wilson		 *
5e5d2e20SChris Wilson		 * The VMA on this list are ordered by type, all GGTT vma are
5e5d2e20SChris Wilson		 * placed at the head and all ppGTT vma are placed at the tail.
5e5d2e20SChris Wilson		 * The different types of GGTT vma are unordered between
5e5d2e20SChris Wilson		 * themselves, use the @vma.tree (which has a defined order
5e5d2e20SChris Wilson		 * between all VMA) to quickly find an exact match.
5e5d2e20SChris Wilson		 */
5e5d2e20SChris Wilson		struct list_head list;
5e5d2e20SChris Wilson
5e5d2e20SChris Wilson		/**
5e5d2e20SChris Wilson		 * @vma.tree: Ordered tree of VMAs backed by this object
5e5d2e20SChris Wilson		 *
5e5d2e20SChris Wilson		 * All VMA created for this object are placed in the @vma.tree
5e5d2e20SChris Wilson		 * for fast retrieval via a binary search in
5e5d2e20SChris Wilson		 * i915_vma_instance(). They are also added to @vma.list for
5e5d2e20SChris Wilson		 * easy iteration.
5e5d2e20SChris Wilson		 */
5e5d2e20SChris Wilson		struct rb_root tree;
5e5d2e20SChris Wilson	} vma;
5e5d2e20SChris Wilson
5e5d2e20SChris Wilson	/**
5e5d2e20SChris Wilson	 * @lut_list: List of vma lookup entries in use for this object.
5e5d2e20SChris Wilson	 *
5e5d2e20SChris Wilson	 * If this object is closed, we need to remove all of its VMA from
5e5d2e20SChris Wilson	 * the fast lookup index in associated contexts; @lut_list provides
5e5d2e20SChris Wilson	 * this translation from object to context->handles_vma.
5e5d2e20SChris Wilson	 */
5e5d2e20SChris Wilson	struct list_head lut_list;
096a42ddSChris Wilson	spinlock_t lut_lock; /* guards lut_list */
5e5d2e20SChris Wilson
80f0b679SMaarten Lankhorst	/**
80f0b679SMaarten Lankhorst	 * @obj_link: Link into @i915_gem_ww_ctx.obj_list
80f0b679SMaarten Lankhorst	 *
80f0b679SMaarten Lankhorst	 * When we lock this object through i915_gem_object_lock() with a
80f0b679SMaarten Lankhorst	 * context, we add it to the list to ensure we can unlock everything
80f0b679SMaarten Lankhorst	 * when i915_gem_ww_ctx_backoff() or i915_gem_ww_ctx_fini() are called.
80f0b679SMaarten Lankhorst	 */
80f0b679SMaarten Lankhorst	struct list_head obj_link;
4d8151aeSThomas Hellström	/**
4d8151aeSThomas Hellström	 * @shared_resv_from: The object shares the resv from this vm.
4d8151aeSThomas Hellström	 */
4d8151aeSThomas Hellström	struct i915_address_space *shares_resv_from;
80f0b679SMaarten Lankhorst
5e5d2e20SChris Wilson	union {
5e5d2e20SChris Wilson		struct rcu_head rcu;
5e5d2e20SChris Wilson		struct llist_node freed;
5e5d2e20SChris Wilson	};
5e5d2e20SChris Wilson
5e5d2e20SChris Wilson	/**
5e5d2e20SChris Wilson	 * Whether the object is currently in the GGTT mmap.
5e5d2e20SChris Wilson	 */
5e5d2e20SChris Wilson	unsigned int userfault_count;
5e5d2e20SChris Wilson	struct list_head userfault_link;
5e5d2e20SChris Wilson
cc662126SAbdiel Janulgue	struct {
cc662126SAbdiel Janulgue		spinlock_t lock; /* Protects access to mmo offsets */
78655598SChris Wilson		struct rb_root offsets;
cc662126SAbdiel Janulgue	} mmo;
cc662126SAbdiel Janulgue
5e5d2e20SChris Wilson	I915_SELFTEST_DECLARE(struct list_head st_link);
5e5d2e20SChris Wilson
2f0b97caSMatthew Auld	unsigned long flags;
2f0b97caSMatthew Auld#define I915_BO_ALLOC_CONTIGUOUS  BIT(0)
7c98501aSMatthew Auld#define I915_BO_ALLOC_VOLATILE    BIT(1)
0ff37575SThomas Hellström#define I915_BO_ALLOC_CPU_CLEAR   BIT(2)
0ff37575SThomas Hellström#define I915_BO_ALLOC_USER        BIT(3)
0d8ee5baSThomas Hellström/* Object is allowed to lose its contents on suspend / resume, even if pinned */
0d8ee5baSThomas Hellström#define I915_BO_ALLOC_PM_VOLATILE BIT(4)
a259cc14SThomas Hellström/* Object needs to be restored early using memcpy during resume */
a259cc14SThomas Hellström#define I915_BO_ALLOC_PM_EARLY    BIT(5)
c471748dSMaarten Lankhorst#define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | \
c471748dSMaarten Lankhorst			     I915_BO_ALLOC_VOLATILE | \
213d5092SThomas Hellström			     I915_BO_ALLOC_CPU_CLEAR | \
0d8ee5baSThomas Hellström			     I915_BO_ALLOC_USER | \
a259cc14SThomas Hellström			     I915_BO_ALLOC_PM_VOLATILE | \
a259cc14SThomas Hellström			     I915_BO_ALLOC_PM_EARLY)
a259cc14SThomas Hellström#define I915_BO_READONLY          BIT(6)
a259cc14SThomas Hellström#define I915_TILING_QUIRK_BIT     7 /* unknown swizzling; do not release! */
d3ac8d42SDaniele Ceraolo Spurio#define I915_BO_PROTECTED         BIT(8)
0ff37575SThomas Hellström	/**
0ff37575SThomas Hellström	 * @mem_flags - Mutable placement-related flags
0ff37575SThomas Hellström	 *
0ff37575SThomas Hellström	 * These are flags that indicate specifics of the memory region
0ff37575SThomas Hellström	 * the object is currently in. As such they are only stable
0ff37575SThomas Hellström	 * either under the object lock or if the object is pinned.
0ff37575SThomas Hellström	 */
0ff37575SThomas Hellström	unsigned int mem_flags;
0ff37575SThomas Hellström#define I915_BO_FLAG_STRUCT_PAGE BIT(0) /* Object backed by struct pages */
0ff37575SThomas Hellström#define I915_BO_FLAG_IOMEM       BIT(1) /* Object backed by IO memory */
3821cc7fSMatthew Auld	/**
3821cc7fSMatthew Auld	 * @cache_level: The desired GTT caching level.
3821cc7fSMatthew Auld	 *
3821cc7fSMatthew Auld	 * See enum i915_cache_level for possible values, along with what
3821cc7fSMatthew Auld	 * each does.
5e5d2e20SChris Wilson	 */
5e5d2e20SChris Wilson	unsigned int cache_level:3;
3821cc7fSMatthew Auld	/**
3821cc7fSMatthew Auld	 * @cache_coherent:
3821cc7fSMatthew Auld	 *
3821cc7fSMatthew Auld	 * Track whether the pages are coherent with the GPU if reading or
3821cc7fSMatthew Auld	 * writing through the CPU caches. The largely depends on the
3821cc7fSMatthew Auld	 * @cache_level setting.
3821cc7fSMatthew Auld	 *
3821cc7fSMatthew Auld	 * On platforms which don't have the shared LLC(HAS_SNOOP), like on Atom
3821cc7fSMatthew Auld	 * platforms, coherency must be explicitly requested with some special
3821cc7fSMatthew Auld	 * GTT caching bits(see enum i915_cache_level). When enabling coherency
3821cc7fSMatthew Auld	 * it does come at a performance and power cost on such platforms. On
3821cc7fSMatthew Auld	 * the flip side the kernel does not need to manually flush any buffers
3821cc7fSMatthew Auld	 * which need to be coherent with the GPU, if the object is not coherent
3821cc7fSMatthew Auld	 * i.e @cache_coherent is zero.
3821cc7fSMatthew Auld	 *
3821cc7fSMatthew Auld	 * On platforms that share the LLC with the CPU(HAS_LLC), all GT memory
3821cc7fSMatthew Auld	 * access will automatically snoop the CPU caches(even with CACHE_NONE).
3821cc7fSMatthew Auld	 * The one exception is when dealing with the display engine, like with
3821cc7fSMatthew Auld	 * scanout surfaces. To handle this the kernel will always flush the
3821cc7fSMatthew Auld	 * surface out of the CPU caches when preparing it for scanout.  Also
3821cc7fSMatthew Auld	 * note that since scanout surfaces are only ever read by the display
3821cc7fSMatthew Auld	 * engine we only need to care about flushing any writes through the CPU
3821cc7fSMatthew Auld	 * cache, reads on the other hand will always be coherent.
3821cc7fSMatthew Auld	 *
3821cc7fSMatthew Auld	 * Something strange here is why @cache_coherent is not a simple
3821cc7fSMatthew Auld	 * boolean, i.e coherent vs non-coherent. The reasoning for this is back
3821cc7fSMatthew Auld	 * to the display engine not being fully coherent. As a result scanout
3821cc7fSMatthew Auld	 * surfaces will either be marked as I915_CACHE_NONE or I915_CACHE_WT.
3821cc7fSMatthew Auld	 * In the case of seeing I915_CACHE_NONE the kernel makes the assumption
3821cc7fSMatthew Auld	 * that this is likely a scanout surface, and will set @cache_coherent
3821cc7fSMatthew Auld	 * as only I915_BO_CACHE_COHERENT_FOR_READ, on platforms with the shared
3821cc7fSMatthew Auld	 * LLC. The kernel uses this to always flush writes through the CPU
3821cc7fSMatthew Auld	 * cache as early as possible, where it can, in effect keeping
3821cc7fSMatthew Auld	 * @cache_dirty clean, so we can potentially avoid stalling when
3821cc7fSMatthew Auld	 * flushing the surface just before doing the scanout.  This does mean
3821cc7fSMatthew Auld	 * we might unnecessarily flush non-scanout objects in some places, but
3821cc7fSMatthew Auld	 * the default assumption is that all normal objects should be using
3821cc7fSMatthew Auld	 * I915_CACHE_LLC, at least on platforms with the shared LLC.
3821cc7fSMatthew Auld	 *
3821cc7fSMatthew Auld	 * Supported values:
3821cc7fSMatthew Auld	 *
3821cc7fSMatthew Auld	 * I915_BO_CACHE_COHERENT_FOR_READ:
3821cc7fSMatthew Auld	 *
3821cc7fSMatthew Auld	 * On shared LLC platforms, we use this for special scanout surfaces,
3821cc7fSMatthew Auld	 * where the display engine is not coherent with the CPU cache. As such
3821cc7fSMatthew Auld	 * we need to ensure we flush any writes before doing the scanout. As an
3821cc7fSMatthew Auld	 * optimisation we try to flush any writes as early as possible to avoid
3821cc7fSMatthew Auld	 * stalling later.
3821cc7fSMatthew Auld	 *
3821cc7fSMatthew Auld	 * Thus for scanout surfaces using I915_CACHE_NONE, on shared LLC
3821cc7fSMatthew Auld	 * platforms, we use:
3821cc7fSMatthew Auld	 *
3821cc7fSMatthew Auld	 *	cache_coherent = I915_BO_CACHE_COHERENT_FOR_READ
3821cc7fSMatthew Auld	 *
3821cc7fSMatthew Auld	 * While for normal objects that are fully coherent, including special
3821cc7fSMatthew Auld	 * scanout surfaces marked as I915_CACHE_WT, we use:
3821cc7fSMatthew Auld	 *
3821cc7fSMatthew Auld	 *	cache_coherent = I915_BO_CACHE_COHERENT_FOR_READ |
3821cc7fSMatthew Auld	 *			 I915_BO_CACHE_COHERENT_FOR_WRITE
3821cc7fSMatthew Auld	 *
3821cc7fSMatthew Auld	 * And then for objects that are not coherent at all we use:
3821cc7fSMatthew Auld	 *
3821cc7fSMatthew Auld	 *	cache_coherent = 0
3821cc7fSMatthew Auld	 *
3821cc7fSMatthew Auld	 * I915_BO_CACHE_COHERENT_FOR_WRITE:
3821cc7fSMatthew Auld	 *
3821cc7fSMatthew Auld	 * When writing through the CPU cache, the GPU is still coherent. Note
3821cc7fSMatthew Auld	 * that this also implies I915_BO_CACHE_COHERENT_FOR_READ.
3821cc7fSMatthew Auld	 */
5e5d2e20SChris Wilson#define I915_BO_CACHE_COHERENT_FOR_READ BIT(0)
5e5d2e20SChris Wilson#define I915_BO_CACHE_COHERENT_FOR_WRITE BIT(1)
3821cc7fSMatthew Auld	unsigned int cache_coherent:2;
3821cc7fSMatthew Auld
3821cc7fSMatthew Auld	/**
3821cc7fSMatthew Auld	 * @cache_dirty:
3821cc7fSMatthew Auld	 *
3821cc7fSMatthew Auld	 * Track if we are we dirty with writes through the CPU cache for this
3821cc7fSMatthew Auld	 * object. As a result reading directly from main memory might yield
3821cc7fSMatthew Auld	 * stale data.
3821cc7fSMatthew Auld	 *
3821cc7fSMatthew Auld	 * This also ties into whether the kernel is tracking the object as
3821cc7fSMatthew Auld	 * coherent with the GPU, as per @cache_coherent, as it determines if
3821cc7fSMatthew Auld	 * flushing might be needed at various points.
3821cc7fSMatthew Auld	 *
3821cc7fSMatthew Auld	 * Another part of @cache_dirty is managing flushing when first
3821cc7fSMatthew Auld	 * acquiring the pages for system memory, at this point the pages are
3821cc7fSMatthew Auld	 * considered foreign, so the default assumption is that the cache is
3821cc7fSMatthew Auld	 * dirty, for example the page zeroing done by the kernel might leave
3821cc7fSMatthew Auld	 * writes though the CPU cache, or swapping-in, while the actual data in
3821cc7fSMatthew Auld	 * main memory is potentially stale.  Note that this is a potential
3821cc7fSMatthew Auld	 * security issue when dealing with userspace objects and zeroing. Now,
3821cc7fSMatthew Auld	 * whether we actually need apply the big sledgehammer of flushing all
3821cc7fSMatthew Auld	 * the pages on acquire depends on if @cache_coherent is marked as
3821cc7fSMatthew Auld	 * I915_BO_CACHE_COHERENT_FOR_WRITE, i.e that the GPU will be coherent
3821cc7fSMatthew Auld	 * for both reads and writes though the CPU cache.
3821cc7fSMatthew Auld	 *
3821cc7fSMatthew Auld	 * Note that on shared LLC platforms we still apply the heavy flush for
3821cc7fSMatthew Auld	 * I915_CACHE_NONE objects, under the assumption that this is going to
3821cc7fSMatthew Auld	 * be used for scanout.
13d29c82SMatthew Auld	 *
13d29c82SMatthew Auld	 * Update: On some hardware there is now also the 'Bypass LLC' MOCS
13d29c82SMatthew Auld	 * entry, which defeats our @cache_coherent tracking, since userspace
13d29c82SMatthew Auld	 * can freely bypass the CPU cache when touching the pages with the GPU,
13d29c82SMatthew Auld	 * where the kernel is completely unaware. On such platform we need
13d29c82SMatthew Auld	 * apply the sledgehammer-on-acquire regardless of the @cache_coherent.
df94fd05SMatthew Auld	 *
df94fd05SMatthew Auld	 * Special care is taken on non-LLC platforms, to prevent potential
df94fd05SMatthew Auld	 * information leak. The driver currently ensures:
df94fd05SMatthew Auld	 *
df94fd05SMatthew Auld	 *   1. All userspace objects, by default, have @cache_level set as
df94fd05SMatthew Auld	 *   I915_CACHE_NONE. The only exception is userptr objects, where we
df94fd05SMatthew Auld	 *   instead force I915_CACHE_LLC, but we also don't allow userspace to
df94fd05SMatthew Auld	 *   ever change the @cache_level for such objects. Another special case
df94fd05SMatthew Auld	 *   is dma-buf, which doesn't rely on @cache_dirty,  but there we
df94fd05SMatthew Auld	 *   always do a forced flush when acquiring the pages, if there is a
df94fd05SMatthew Auld	 *   chance that the pages can be read directly from main memory with
df94fd05SMatthew Auld	 *   the GPU.
df94fd05SMatthew Auld	 *
df94fd05SMatthew Auld	 *   2. All I915_CACHE_NONE objects have @cache_dirty initially true.
df94fd05SMatthew Auld	 *
df94fd05SMatthew Auld	 *   3. All swapped-out objects(i.e shmem) have @cache_dirty set to
df94fd05SMatthew Auld	 *   true.
df94fd05SMatthew Auld	 *
df94fd05SMatthew Auld	 *   4. The @cache_dirty is never freely reset before the initial
df94fd05SMatthew Auld	 *   flush, even if userspace adjusts the @cache_level through the
df94fd05SMatthew Auld	 *   i915_gem_set_caching_ioctl.
df94fd05SMatthew Auld	 *
df94fd05SMatthew Auld	 *   5. All @cache_dirty objects(including swapped-in) are initially
df94fd05SMatthew Auld	 *   flushed with a synchronous call to drm_clflush_sg in
df94fd05SMatthew Auld	 *   __i915_gem_object_set_pages. The @cache_dirty can be freely reset
df94fd05SMatthew Auld	 *   at this point. All further asynchronous clfushes are never security
df94fd05SMatthew Auld	 *   critical, i.e userspace is free to race against itself.
3821cc7fSMatthew Auld	 */
5e5d2e20SChris Wilson	unsigned int cache_dirty:1;
5e5d2e20SChris Wilson
5e5d2e20SChris Wilson	/**
5e5d2e20SChris Wilson	 * @read_domains: Read memory domains.
5e5d2e20SChris Wilson	 *
5e5d2e20SChris Wilson	 * These monitor which caches contain read/write data related to the
5e5d2e20SChris Wilson	 * object. When transitioning from one set of domains to another,
5e5d2e20SChris Wilson	 * the driver is called to ensure that caches are suitably flushed and
5e5d2e20SChris Wilson	 * invalidated.
5e5d2e20SChris Wilson	 */
5e5d2e20SChris Wilson	u16 read_domains;
5e5d2e20SChris Wilson
5e5d2e20SChris Wilson	/**
5e5d2e20SChris Wilson	 * @write_domain: Corresponding unique write memory domain.
5e5d2e20SChris Wilson	 */
5e5d2e20SChris Wilson	u16 write_domain;
5e5d2e20SChris Wilson
da42104fSChris Wilson	struct intel_frontbuffer __rcu *frontbuffer;
5e5d2e20SChris Wilson
5e5d2e20SChris Wilson	/** Current tiling stride for the object, if it's tiled. */
5e5d2e20SChris Wilson	unsigned int tiling_and_stride;
5e5d2e20SChris Wilson#define FENCE_MINIMUM_STRIDE 128 /* See i915_tiling_ok() */
5e5d2e20SChris Wilson#define TILING_MASK (FENCE_MINIMUM_STRIDE - 1)
5e5d2e20SChris Wilson#define STRIDE_MASK (~TILING_MASK)
5e5d2e20SChris Wilson
5e5d2e20SChris Wilson	struct {
f86dbacbSDaniel Vetter		/*
f86dbacbSDaniel Vetter		 * Protects the pages and their use. Do not use directly, but
f86dbacbSDaniel Vetter		 * instead go through the pin/unpin interfaces.
f86dbacbSDaniel Vetter		 */
5e5d2e20SChris Wilson		atomic_t pages_pin_count;
e25d1ea4SMatthew Auld
e25d1ea4SMatthew Auld		/**
e25d1ea4SMatthew Auld		 * @shrink_pin: Prevents the pages from being made visible to
e25d1ea4SMatthew Auld		 * the shrinker, while the shrink_pin is non-zero. Most users
e25d1ea4SMatthew Auld		 * should pretty much never have to care about this, outside of
e25d1ea4SMatthew Auld		 * some special use cases.
e25d1ea4SMatthew Auld		 *
e25d1ea4SMatthew Auld		 * By default most objects will start out as visible to the
e25d1ea4SMatthew Auld		 * shrinker(if I915_GEM_OBJECT_IS_SHRINKABLE) as soon as the
e25d1ea4SMatthew Auld		 * backing pages are attached to the object, like in
e25d1ea4SMatthew Auld		 * __i915_gem_object_set_pages(). They will then be removed the
e25d1ea4SMatthew Auld		 * shrinker list once the pages are released.
e25d1ea4SMatthew Auld		 *
e25d1ea4SMatthew Auld		 * The @shrink_pin is incremented by calling
e25d1ea4SMatthew Auld		 * i915_gem_object_make_unshrinkable(), which will also remove
e25d1ea4SMatthew Auld		 * the object from the shrinker list, if the pin count was zero.
e25d1ea4SMatthew Auld		 *
e25d1ea4SMatthew Auld		 * Callers will then typically call
e25d1ea4SMatthew Auld		 * i915_gem_object_make_shrinkable() or
e25d1ea4SMatthew Auld		 * i915_gem_object_make_purgeable() to decrement the pin count,
e25d1ea4SMatthew Auld		 * and make the pages visible again.
e25d1ea4SMatthew Auld		 */
99013b10SChris Wilson		atomic_t shrink_pin;
5e5d2e20SChris Wilson
232a6ebaSMatthew Auld		/**
ebd4a8ecSMatthew Auld		 * @ttm_shrinkable: True when the object is using shmem pages
ebd4a8ecSMatthew Auld		 * underneath. Protected by the object lock.
ebd4a8ecSMatthew Auld		 */
ebd4a8ecSMatthew Auld		bool ttm_shrinkable;
ebd4a8ecSMatthew Auld
ebd4a8ecSMatthew Auld		/**
2459e56fSMatthew Auld		 * Priority list of potential placements for this object.
2459e56fSMatthew Auld		 */
2459e56fSMatthew Auld		struct intel_memory_region **placements;
2459e56fSMatthew Auld		int n_placements;
2459e56fSMatthew Auld
2459e56fSMatthew Auld		/**
232a6ebaSMatthew Auld		 * Memory region for this object.
232a6ebaSMatthew Auld		 */
232a6ebaSMatthew Auld		struct intel_memory_region *region;
d1487389SThomas Hellström
232a6ebaSMatthew Auld		/**
687c7d0fSMatthew Auld		 * Memory manager resource allocated for this object. Only
687c7d0fSMatthew Auld		 * needed for the mock region.
232a6ebaSMatthew Auld		 */
687c7d0fSMatthew Auld		struct ttm_resource *res;
d1487389SThomas Hellström
7c98501aSMatthew Auld		/**
7c98501aSMatthew Auld		 * Element within memory_region->objects or region->purgeable
7c98501aSMatthew Auld		 * if the object is marked as DONTNEED. Access is protected by
7c98501aSMatthew Auld		 * region->obj_lock.
7c98501aSMatthew Auld		 */
7c98501aSMatthew Auld		struct list_head region_link;
232a6ebaSMatthew Auld
cad7109aSThomas Hellström		struct i915_refct_sgt *rsgt;
5e5d2e20SChris Wilson		struct sg_table *pages;
5e5d2e20SChris Wilson		void *mapping;
5e5d2e20SChris Wilson
5e5d2e20SChris Wilson		struct i915_page_sizes {
5e5d2e20SChris Wilson			/**
5e5d2e20SChris Wilson			 * The sg mask of the pages sg_table. i.e the mask of
5e5d2e20SChris Wilson			 * of the lengths for each sg entry.
5e5d2e20SChris Wilson			 */
5e5d2e20SChris Wilson			unsigned int phys;
5e5d2e20SChris Wilson
5e5d2e20SChris Wilson			/**
5e5d2e20SChris Wilson			 * The gtt page sizes we are allowed to use given the
5e5d2e20SChris Wilson			 * sg mask and the supported page sizes. This will
5e5d2e20SChris Wilson			 * express the smallest unit we can use for the whole
5e5d2e20SChris Wilson			 * object, as well as the larger sizes we may be able
5e5d2e20SChris Wilson			 * to use opportunistically.
5e5d2e20SChris Wilson			 */
5e5d2e20SChris Wilson			unsigned int sg;
5e5d2e20SChris Wilson
5e5d2e20SChris Wilson			/**
5e5d2e20SChris Wilson			 * The actual gtt page size usage. Since we can have
5e5d2e20SChris Wilson			 * multiple vma associated with this object we need to
5e5d2e20SChris Wilson			 * prevent any trampling of state, hence a copy of this
5e5d2e20SChris Wilson			 * struct also lives in each vma, therefore the gtt
5e5d2e20SChris Wilson			 * value here should only be read/write through the vma.
5e5d2e20SChris Wilson			 */
5e5d2e20SChris Wilson			unsigned int gtt;
5e5d2e20SChris Wilson		} page_sizes;
5e5d2e20SChris Wilson
5e5d2e20SChris Wilson		I915_SELFTEST_DECLARE(unsigned int page_mask);
5e5d2e20SChris Wilson
934941edSTvrtko Ursulin		struct i915_gem_object_page_iter get_page;
934941edSTvrtko Ursulin		struct i915_gem_object_page_iter get_dma_page;
5e5d2e20SChris Wilson
5e5d2e20SChris Wilson		/**
e25d1ea4SMatthew Auld		 * Element within i915->mm.shrink_list or i915->mm.purge_list,
5e5d2e20SChris Wilson		 * locked by i915->mm.obj_lock.
5e5d2e20SChris Wilson		 */
5e5d2e20SChris Wilson		struct list_head link;
5e5d2e20SChris Wilson
5e5d2e20SChris Wilson		/**
5e5d2e20SChris Wilson		 * Advice: are the backing pages purgeable?
5e5d2e20SChris Wilson		 */
5e5d2e20SChris Wilson		unsigned int madv:2;
5e5d2e20SChris Wilson
5e5d2e20SChris Wilson		/**
5e5d2e20SChris Wilson		 * This is set if the object has been written to since the
5e5d2e20SChris Wilson		 * pages were last acquired.
5e5d2e20SChris Wilson		 */
5e5d2e20SChris Wilson		bool dirty:1;
5e5d2e20SChris Wilson	} mm;
5e5d2e20SChris Wilson
213d5092SThomas Hellström	struct {
cad7109aSThomas Hellström		struct i915_refct_sgt *cached_io_rsgt;
cf3e3e86SMaarten Lankhorst		struct i915_gem_object_page_iter get_io_page;
c56ce956SThomas Hellström		struct drm_i915_gem_object *backup;
213d5092SThomas Hellström		bool created:1;
213d5092SThomas Hellström	} ttm;
213d5092SThomas Hellström
d3ac8d42SDaniele Ceraolo Spurio	/*
d3ac8d42SDaniele Ceraolo Spurio	 * Record which PXP key instance this object was created against (if
d3ac8d42SDaniele Ceraolo Spurio	 * any), so we can use it to determine if the encryption is valid by
d3ac8d42SDaniele Ceraolo Spurio	 * comparing against the current key instance.
d3ac8d42SDaniele Ceraolo Spurio	 */
d3ac8d42SDaniele Ceraolo Spurio	u32 pxp_key_instance;
d3ac8d42SDaniele Ceraolo Spurio
5e5d2e20SChris Wilson	/** Record of address bit 17 of each page at last unbind. */
5e5d2e20SChris Wilson	unsigned long *bit_17;
5e5d2e20SChris Wilson
5e5d2e20SChris Wilson	union {
20ee27bdSMaarten Lankhorst#ifdef CONFIG_MMU_NOTIFIER
5e5d2e20SChris Wilson		struct i915_gem_userptr {
5e5d2e20SChris Wilson			uintptr_t ptr;
ed29c269SMaarten Lankhorst			unsigned long notifier_seq;
5e5d2e20SChris Wilson
ed29c269SMaarten Lankhorst			struct mmu_interval_notifier notifier;
ed29c269SMaarten Lankhorst			struct page **pvec;
ed29c269SMaarten Lankhorst			int page_ref;
5e5d2e20SChris Wilson		} userptr;
20ee27bdSMaarten Lankhorst#endif
5e5d2e20SChris Wilson
41a9c75dSChris Wilson		struct drm_mm_node *stolen;
41a9c75dSChris Wilson
5e5d2e20SChris Wilson		unsigned long scratch;
89351925SChris Wilson		u64 encode;
5e5d2e20SChris Wilson
5e5d2e20SChris Wilson		void *gvt_info;
5e5d2e20SChris Wilson	};
5e5d2e20SChris Wilson};
5e5d2e20SChris Wilson
5e5d2e20SChris Wilsonstatic inline struct drm_i915_gem_object *
5e5d2e20SChris Wilsonto_intel_bo(struct drm_gem_object *gem)
5e5d2e20SChris Wilson{
5e5d2e20SChris Wilson	/* Assert that to_intel_bo(NULL) == NULL */
5e5d2e20SChris Wilson	BUILD_BUG_ON(offsetof(struct drm_i915_gem_object, base));
5e5d2e20SChris Wilson
5e5d2e20SChris Wilson	return container_of(gem, struct drm_i915_gem_object, base);
5e5d2e20SChris Wilson}
5e5d2e20SChris Wilson
5e5d2e20SChris Wilson#endif