1 /*
2  * Copyright © 2017 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #ifndef IRIS_BUFMGR_H
25 #define IRIS_BUFMGR_H
26 
27 #include <stdbool.h>
28 #include <stdint.h>
29 #include <stdio.h>
30 #include <sys/types.h>
31 #include "c11/threads.h"
32 #include "util/macros.h"
33 #include "util/u_atomic.h"
34 #include "util/u_dynarray.h"
35 #include "util/list.h"
36 #include "util/simple_mtx.h"
37 #include "pipe/p_defines.h"
38 #include "pipebuffer/pb_slab.h"
39 
40 struct intel_device_info;
41 struct pipe_debug_callback;
42 struct isl_surf;
43 struct iris_syncobj;
44 
45 /**
46  * Memory zones.  When allocating a buffer, you can request that it is
47  * placed into a specific region of the virtual address space (PPGTT).
48  *
49  * Most buffers can go anywhere (IRIS_MEMZONE_OTHER).  Some buffers are
50  * accessed via an offset from a base address.  STATE_BASE_ADDRESS has
51  * a maximum 4GB size for each region, so we need to restrict those
52  * buffers to be within 4GB of the base.  Each memory zone corresponds
53  * to a particular base address.
54  *
55  * We lay out the virtual address space as follows:
56  *
57  * - [0,   4K): Nothing            (empty page for null address)
58  * - [4K,  4G): Shaders            (Instruction Base Address)
59  * - [4G,  8G): Surfaces & Binders (Surface State Base Address, Bindless ...)
60  * - [8G, 12G): Dynamic            (Dynamic State Base Address)
61  * - [12G, *):  Other              (everything else in the full 48-bit VMA)
62  *
63  * A special buffer for border color lives at the start of the dynamic state
64  * memory zone.  This unfortunately has to be handled specially because the
65  * SAMPLER_STATE "Indirect State Pointer" field is only a 24-bit pointer.
66  *
67  * Each GL context uses a separate GEM context, which technically gives them
68  * each a separate VMA.  However, we assign address globally, so buffers will
69  * have the same address in all GEM contexts.  This lets us have a single BO
70  * field for the address, which is easy and cheap.
71  */
72 enum iris_memory_zone {
73    IRIS_MEMZONE_SHADER,
74    IRIS_MEMZONE_BINDER,
75    IRIS_MEMZONE_BINDLESS,
76    IRIS_MEMZONE_SURFACE,
77    IRIS_MEMZONE_DYNAMIC,
78    IRIS_MEMZONE_OTHER,
79 
80    IRIS_MEMZONE_BORDER_COLOR_POOL,
81 };
82 
83 /* Intentionally exclude single buffer "zones" */
84 #define IRIS_MEMZONE_COUNT (IRIS_MEMZONE_OTHER + 1)
85 
86 #define IRIS_BINDER_SIZE (64 * 1024)
87 #define IRIS_MAX_BINDERS 100
88 #define IRIS_BINDLESS_SIZE (8 * 1024 * 1024)
89 
90 #define IRIS_MEMZONE_SHADER_START     (0ull * (1ull << 32))
91 #define IRIS_MEMZONE_BINDER_START     (1ull * (1ull << 32))
92 #define IRIS_MEMZONE_BINDLESS_START   (IRIS_MEMZONE_BINDER_START + IRIS_MAX_BINDERS * IRIS_BINDER_SIZE)
93 #define IRIS_MEMZONE_SURFACE_START    (IRIS_MEMZONE_BINDLESS_START + IRIS_BINDLESS_SIZE)
94 #define IRIS_MEMZONE_DYNAMIC_START    (2ull * (1ull << 32))
95 #define IRIS_MEMZONE_OTHER_START      (3ull * (1ull << 32))
96 
97 #define IRIS_BORDER_COLOR_POOL_ADDRESS IRIS_MEMZONE_DYNAMIC_START
98 #define IRIS_BORDER_COLOR_POOL_SIZE (64 * 1024)
99 
100 /**
101  * Classification of the various incoherent caches of the GPU into a number of
102  * caching domains.
103  */
104 enum iris_domain {
105    /** Render color cache. */
106    IRIS_DOMAIN_RENDER_WRITE = 0,
107    /** (Hi)Z/stencil cache. */
108    IRIS_DOMAIN_DEPTH_WRITE,
109    /** Data port (HDC) cache. */
110    IRIS_DOMAIN_DATA_WRITE,
111    /** Any other read-write cache. */
112    IRIS_DOMAIN_OTHER_WRITE,
113    /** Vertex cache. */
114    IRIS_DOMAIN_VF_READ,
115    /** Any other read-only cache. */
116    IRIS_DOMAIN_OTHER_READ,
117    /** Number of caching domains. */
118    NUM_IRIS_DOMAINS,
119    /** Not a real cache, use to opt out of the cache tracking mechanism. */
120    IRIS_DOMAIN_NONE = NUM_IRIS_DOMAINS
121 };
122 
123 /**
124  * Whether a caching domain is guaranteed not to write any data to memory.
125  */
126 static inline bool
iris_domain_is_read_only(enum iris_domain access)127 iris_domain_is_read_only(enum iris_domain access)
128 {
129    return access == IRIS_DOMAIN_OTHER_READ ||
130           access == IRIS_DOMAIN_VF_READ;
131 }
132 
133 enum iris_mmap_mode {
134    IRIS_MMAP_NONE, /**< Cannot be mapped */
135    IRIS_MMAP_UC, /**< Fully uncached memory map */
136    IRIS_MMAP_WC, /**< Write-combining map with no caching of reads */
137    IRIS_MMAP_WB, /**< Write-back mapping with CPU caches enabled */
138 };
139 
140 #define IRIS_BATCH_COUNT 2
141 
142 struct iris_bo_screen_deps {
143    struct iris_syncobj *write_syncobjs[IRIS_BATCH_COUNT];
144    struct iris_syncobj *read_syncobjs[IRIS_BATCH_COUNT];
145 };
146 
147 struct iris_bo {
148    /**
149     * Size in bytes of the buffer object.
150     *
151     * The size may be larger than the size originally requested for the
152     * allocation, such as being aligned to page size.
153     */
154    uint64_t size;
155 
156    /** Buffer manager context associated with this buffer object */
157    struct iris_bufmgr *bufmgr;
158 
159    /** Pre-computed hash using _mesa_hash_pointer for cache tracking sets */
160    uint32_t hash;
161 
162    /** The GEM handle for this buffer object. */
163    uint32_t gem_handle;
164 
165    /**
166     * Virtual address of the buffer inside the PPGTT (Per-Process Graphics
167     * Translation Table).
168     *
169     * Although each hardware context has its own VMA, we assign BO's to the
170     * same address in all contexts, for simplicity.
171     */
172    uint64_t address;
173 
174    /**
175     * If non-zero, then this bo has an aux-map translation to this address.
176     */
177    uint64_t aux_map_address;
178 
179    /**
180     * If this BO is referenced by a batch, this _may_ be the index into the
181     * batch->exec_bos[] list.
182     *
183     * Note that a single buffer may be used by multiple batches/contexts,
184     * and thus appear in multiple lists, but we only track one index here.
185     * In the common case one can guess that batch->exec_bos[bo->index] == bo
186     * and double check if that's true to avoid a linear list walk.
187     *
188     * XXX: this is not ideal now that we have more than one batch per context,
189     * XXX: as the index will flop back and forth between the render index and
190     * XXX: compute index...
191     */
192    unsigned index;
193 
194    int refcount;
195    const char *name;
196 
197    /** BO cache list */
198    struct list_head head;
199 
200    /**
201     * Synchronization sequence number of most recent access of this BO from
202     * each caching domain.
203     *
204     * Although this is a global field, use in multiple contexts should be
205     * safe, see iris_emit_buffer_barrier_for() for details.
206     *
207     * Also align it to 64 bits. This will make atomic operations faster on 32
208     * bit platforms.
209     */
210    uint64_t last_seqnos[NUM_IRIS_DOMAINS] __attribute__ ((aligned (8)));
211 
212    /** Up to one per screen, may need realloc. */
213    struct iris_bo_screen_deps *deps;
214    int deps_size;
215 
216    /**
217     * Boolean of whether the GPU is definitely not accessing the buffer.
218     *
219     * This is only valid when reusable, since non-reusable
220     * buffers are those that have been shared with other
221     * processes, so we don't know their state.
222     */
223    bool idle;
224 
225    union {
226       struct {
227          uint64_t kflags;
228 
229          time_t free_time;
230 
231          /** Mapped address for the buffer, saved across map/unmap cycles */
232          void *map;
233 
234          /** List of GEM handle exports of this buffer (bo_export) */
235          struct list_head exports;
236 
237          /**
238           * Kernel-assigned global name for this object
239           *
240           * List contains both flink named and prime fd'd objects
241           */
242          unsigned global_name;
243 
244          /** The mmap coherency mode selected at BO allocation time */
245          enum iris_mmap_mode mmap_mode;
246 
247          /** Was this buffer imported from an external client? */
248          bool imported;
249 
250          /** Has this buffer been exported to external clients? */
251          bool exported;
252 
253          /** Boolean of whether this buffer can be re-used */
254          bool reusable;
255 
256          /** Boolean of whether this buffer points into user memory */
257          bool userptr;
258 
259          /** Boolean of whether this was allocated from local memory */
260          bool local;
261       } real;
262       struct {
263          struct pb_slab_entry entry;
264          struct iris_bo *real;
265       } slab;
266    };
267 };
268 
269 #define BO_ALLOC_ZEROED      (1<<0)
270 #define BO_ALLOC_COHERENT    (1<<1)
271 #define BO_ALLOC_SMEM        (1<<2)
272 #define BO_ALLOC_SCANOUT     (1<<3)
273 #define BO_ALLOC_NO_SUBALLOC (1<<4)
274 
275 /**
276  * Allocate a buffer object.
277  *
278  * Buffer objects are not necessarily initially mapped into CPU virtual
279  * address space or graphics device aperture.  They must be mapped
280  * using iris_bo_map() to be used by the CPU.
281  */
282 struct iris_bo *iris_bo_alloc(struct iris_bufmgr *bufmgr,
283                               const char *name,
284                               uint64_t size,
285                               uint32_t alignment,
286                               enum iris_memory_zone memzone,
287                               unsigned flags);
288 
289 struct iris_bo *
290 iris_bo_create_userptr(struct iris_bufmgr *bufmgr, const char *name,
291                        void *ptr, size_t size,
292                        enum iris_memory_zone memzone);
293 
294 /** Takes a reference on a buffer object */
295 static inline void
iris_bo_reference(struct iris_bo * bo)296 iris_bo_reference(struct iris_bo *bo)
297 {
298    p_atomic_inc(&bo->refcount);
299 }
300 
301 /**
302  * Releases a reference on a buffer object, freeing the data if
303  * no references remain.
304  */
305 void iris_bo_unreference(struct iris_bo *bo);
306 
307 #define MAP_READ          PIPE_MAP_READ
308 #define MAP_WRITE         PIPE_MAP_WRITE
309 #define MAP_ASYNC         PIPE_MAP_UNSYNCHRONIZED
310 #define MAP_PERSISTENT    PIPE_MAP_PERSISTENT
311 #define MAP_COHERENT      PIPE_MAP_COHERENT
312 /* internal */
313 #define MAP_RAW           (PIPE_MAP_DRV_PRV << 0)
314 #define MAP_INTERNAL_MASK (MAP_RAW)
315 
316 #define MAP_FLAGS         (MAP_READ | MAP_WRITE | MAP_ASYNC | \
317                            MAP_PERSISTENT | MAP_COHERENT | MAP_INTERNAL_MASK)
318 
319 /**
320  * Maps the buffer into userspace.
321  *
322  * This function will block waiting for any existing execution on the
323  * buffer to complete, first.  The resulting mapping is returned.
324  */
325 MUST_CHECK void *iris_bo_map(struct pipe_debug_callback *dbg,
326                              struct iris_bo *bo, unsigned flags);
327 
328 /**
329  * Reduces the refcount on the userspace mapping of the buffer
330  * object.
331  */
iris_bo_unmap(struct iris_bo * bo)332 static inline int iris_bo_unmap(struct iris_bo *bo) { return 0; }
333 
334 /**
335  * Waits for rendering to an object by the GPU to have completed.
336  *
337  * This is not required for any access to the BO by bo_map,
338  * bo_subdata, etc.  It is merely a way for the driver to implement
339  * glFinish.
340  */
341 void iris_bo_wait_rendering(struct iris_bo *bo);
342 
343 
344 /**
345  * Unref a buffer manager instance.
346  */
347 void iris_bufmgr_unref(struct iris_bufmgr *bufmgr);
348 
349 /**
350  * Create a visible name for a buffer which can be used by other apps
351  *
352  * \param buf Buffer to create a name for
353  * \param name Returned name
354  */
355 int iris_bo_flink(struct iris_bo *bo, uint32_t *name);
356 
357 /**
358  * Returns true if the BO is backed by a real GEM object, false if it's
359  * a wrapper that's suballocated from a larger BO.
360  */
361 static inline bool
iris_bo_is_real(struct iris_bo * bo)362 iris_bo_is_real(struct iris_bo *bo)
363 {
364    return bo->gem_handle != 0;
365 }
366 
367 /**
368  * Unwrap any slab-allocated wrapper BOs to get the BO for the underlying
369  * backing storage, which is a real BO associated with a GEM object.
370  */
371 static inline struct iris_bo *
iris_get_backing_bo(struct iris_bo * bo)372 iris_get_backing_bo(struct iris_bo *bo)
373 {
374    if (!iris_bo_is_real(bo))
375       bo = bo->slab.real;
376 
377    /* We only allow one level of wrapping. */
378    assert(iris_bo_is_real(bo));
379 
380    return bo;
381 }
382 
383 /**
384  * Is this buffer shared with external clients (imported or exported)?
385  */
386 static inline bool
iris_bo_is_external(const struct iris_bo * bo)387 iris_bo_is_external(const struct iris_bo *bo)
388 {
389    bo = iris_get_backing_bo((struct iris_bo *) bo);
390    return bo->real.exported || bo->real.imported;
391 }
392 
393 static inline bool
iris_bo_is_imported(const struct iris_bo * bo)394 iris_bo_is_imported(const struct iris_bo *bo)
395 {
396    bo = iris_get_backing_bo((struct iris_bo *) bo);
397    return bo->real.imported;
398 }
399 
400 static inline bool
iris_bo_is_exported(const struct iris_bo * bo)401 iris_bo_is_exported(const struct iris_bo *bo)
402 {
403    bo = iris_get_backing_bo((struct iris_bo *) bo);
404    return bo->real.exported;
405 }
406 
407 static inline enum iris_mmap_mode
iris_bo_mmap_mode(const struct iris_bo * bo)408 iris_bo_mmap_mode(const struct iris_bo *bo)
409 {
410    bo = iris_get_backing_bo((struct iris_bo *) bo);
411    return bo->real.mmap_mode;
412 }
413 
414 /**
415  * Mark a buffer as being shared with other external clients.
416  */
417 void iris_bo_mark_exported(struct iris_bo *bo);
418 
419 /**
420  * Returns true  if mapping the buffer for write could cause the process
421  * to block, due to the object being active in the GPU.
422  */
423 bool iris_bo_busy(struct iris_bo *bo);
424 
425 /**
426  * Specify the volatility of the buffer.
427  * \param bo Buffer to create a name for
428  * \param madv The purgeable status
429  *
430  * Use I915_MADV_DONTNEED to mark the buffer as purgeable, and it will be
431  * reclaimed under memory pressure. If you subsequently require the buffer,
432  * then you must pass I915_MADV_WILLNEED to mark the buffer as required.
433  *
434  * Returns 1 if the buffer was retained, or 0 if it was discarded whilst
435  * marked as I915_MADV_DONTNEED.
436  */
437 int iris_bo_madvise(struct iris_bo *bo, int madv);
438 
439 struct iris_bufmgr *iris_bufmgr_get_for_fd(struct intel_device_info *devinfo,
440                                            int fd, bool bo_reuse);
441 int iris_bufmgr_get_fd(struct iris_bufmgr *bufmgr);
442 
443 struct iris_bo *iris_bo_gem_create_from_name(struct iris_bufmgr *bufmgr,
444                                              const char *name,
445                                              unsigned handle);
446 
447 void* iris_bufmgr_get_aux_map_context(struct iris_bufmgr *bufmgr);
448 
449 int iris_bo_wait(struct iris_bo *bo, int64_t timeout_ns);
450 
451 uint32_t iris_create_hw_context(struct iris_bufmgr *bufmgr);
452 uint32_t iris_clone_hw_context(struct iris_bufmgr *bufmgr, uint32_t ctx_id);
453 
454 #define IRIS_CONTEXT_LOW_PRIORITY    ((I915_CONTEXT_MIN_USER_PRIORITY-1)/2)
455 #define IRIS_CONTEXT_MEDIUM_PRIORITY (I915_CONTEXT_DEFAULT_PRIORITY)
456 #define IRIS_CONTEXT_HIGH_PRIORITY   ((I915_CONTEXT_MAX_USER_PRIORITY+1)/2)
457 
458 int iris_hw_context_set_priority(struct iris_bufmgr *bufmgr,
459                                  uint32_t ctx_id, int priority);
460 
461 void iris_destroy_hw_context(struct iris_bufmgr *bufmgr, uint32_t ctx_id);
462 
463 int iris_gem_get_tiling(struct iris_bo *bo, uint32_t *tiling);
464 int iris_gem_set_tiling(struct iris_bo *bo, const struct isl_surf *surf);
465 
466 int iris_bo_export_dmabuf(struct iris_bo *bo, int *prime_fd);
467 struct iris_bo *iris_bo_import_dmabuf(struct iris_bufmgr *bufmgr, int prime_fd);
468 
469 /**
470  * Exports a bo as a GEM handle into a given DRM file descriptor
471  * \param bo Buffer to export
472  * \param drm_fd File descriptor where the new handle is created
473  * \param out_handle Pointer to store the new handle
474  *
475  * Returns 0 if the buffer was successfully exported, a non zero error code
476  * otherwise.
477  */
478 int iris_bo_export_gem_handle_for_device(struct iris_bo *bo, int drm_fd,
479                                          uint32_t *out_handle);
480 
481 uint32_t iris_bo_export_gem_handle(struct iris_bo *bo);
482 
483 int iris_reg_read(struct iris_bufmgr *bufmgr, uint32_t offset, uint64_t *out);
484 
485 /**
486  * Returns the BO's address relative to the appropriate base address.
487  *
488  * All of our base addresses are programmed to the start of a 4GB region,
489  * so simply returning the bottom 32 bits of the BO address will give us
490  * the offset from whatever base address corresponds to that memory region.
491  */
492 static inline uint32_t
iris_bo_offset_from_base_address(struct iris_bo * bo)493 iris_bo_offset_from_base_address(struct iris_bo *bo)
494 {
495    /* This only works for buffers in the memory zones corresponding to a
496     * base address - the top, unbounded memory zone doesn't have a base.
497     */
498    assert(bo->address < IRIS_MEMZONE_OTHER_START);
499    return bo->address;
500 }
501 
502 /**
503  * Track access of a BO from the specified caching domain and sequence number.
504  *
505  * Can be used without locking.  Only the most recent access (i.e. highest
506  * seqno) is tracked.
507  */
508 static inline void
iris_bo_bump_seqno(struct iris_bo * bo,uint64_t seqno,enum iris_domain type)509 iris_bo_bump_seqno(struct iris_bo *bo, uint64_t seqno,
510                    enum iris_domain type)
511 {
512    uint64_t *const last_seqno = &bo->last_seqnos[type];
513    uint64_t tmp, prev_seqno = p_atomic_read(last_seqno);
514 
515    while (prev_seqno < seqno &&
516           prev_seqno != (tmp = p_atomic_cmpxchg(last_seqno, prev_seqno, seqno)))
517       prev_seqno = tmp;
518 }
519 
520 enum iris_memory_zone iris_memzone_for_address(uint64_t address);
521 
522 int iris_bufmgr_create_screen_id(struct iris_bufmgr *bufmgr);
523 
524 simple_mtx_t *iris_bufmgr_get_bo_deps_lock(struct iris_bufmgr *bufmgr);
525 
526 #endif /* IRIS_BUFMGR_H */
527