1 /*
2  * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
3  * Copyright © 2015 Advanced Micro Devices, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining
7  * a copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
16  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17  * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
18  * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  *
23  * The above copyright notice and this permission notice (including the
24  * next paragraph) shall be included in all copies or substantial portions
25  * of the Software.
26  */
27 
28 #ifndef AMDGPU_CS_H
29 #define AMDGPU_CS_H
30 
31 #include "amdgpu_bo.h"
32 #include "util/u_memory.h"
33 #include "drm-uapi/amdgpu_drm.h"
34 
35 /* Smaller submits means the GPU gets busy sooner and there is less
36  * waiting for buffers and fences. Proof:
37  *   http://www.phoronix.com/scan.php?page=article&item=mesa-111-si&num=1
38  */
39 #define IB_MAX_SUBMIT_DWORDS (20 * 1024)
40 
41 struct amdgpu_ctx {
42    struct amdgpu_winsys *ws;
43    amdgpu_context_handle ctx;
44    amdgpu_bo_handle user_fence_bo;
45    uint64_t *user_fence_cpu_address_base;
46    int refcount;
47    unsigned initial_num_total_rejected_cs;
48    unsigned num_rejected_cs;
49 };
50 
51 struct amdgpu_cs_buffer {
52    struct amdgpu_winsys_bo *bo;
53    union {
54       struct {
55          uint32_t priority_usage;
56       } real;
57       struct {
58          uint32_t real_idx; /* index of underlying real BO */
59       } slab;
60    } u;
61    enum radeon_bo_usage usage;
62 };
63 
64 enum ib_type {
65    IB_PREAMBLE,
66    IB_MAIN,
67    IB_NUM,
68 };
69 
70 struct amdgpu_ib {
71    struct radeon_cmdbuf *rcs; /* pointer to the driver-owned data */
72 
73    /* A buffer out of which new IBs are allocated. */
74    struct pb_buffer        *big_ib_buffer;
75    uint8_t                 *ib_mapped;
76    unsigned                used_ib_space;
77 
78    /* The maximum seen size from cs_check_space. If the driver does
79     * cs_check_space and flush, the newly allocated IB should have at least
80     * this size.
81     */
82    unsigned                max_check_space_size;
83 
84    unsigned                max_ib_size;
85    uint32_t                *ptr_ib_size;
86    bool                    ptr_ib_size_inside_ib;
87    enum ib_type            ib_type;
88 };
89 
90 struct amdgpu_fence_list {
91    struct pipe_fence_handle    **list;
92    unsigned                    num;
93    unsigned                    max;
94 };
95 
96 struct amdgpu_cs_context {
97    struct drm_amdgpu_cs_chunk_ib ib[IB_NUM];
98    uint32_t                    *ib_main_addr; /* the beginning of IB before chaining */
99 
100    /* Buffers. */
101    unsigned                    max_real_buffers;
102    unsigned                    num_real_buffers;
103    struct amdgpu_cs_buffer     *real_buffers;
104 
105    unsigned                    num_slab_buffers;
106    unsigned                    max_slab_buffers;
107    struct amdgpu_cs_buffer     *slab_buffers;
108 
109    unsigned                    num_sparse_buffers;
110    unsigned                    max_sparse_buffers;
111    struct amdgpu_cs_buffer     *sparse_buffers;
112 
113    int16_t                     *buffer_indices_hashlist;
114 
115    struct amdgpu_winsys_bo     *last_added_bo;
116    unsigned                    last_added_bo_index;
117    unsigned                    last_added_bo_usage;
118    uint32_t                    last_added_bo_priority_usage;
119 
120    struct amdgpu_fence_list    fence_dependencies;
121    struct amdgpu_fence_list    syncobj_dependencies;
122    struct amdgpu_fence_list    syncobj_to_signal;
123 
124    struct pipe_fence_handle    *fence;
125 
126    /* the error returned from cs_flush for non-async submissions */
127    int                         error_code;
128 
129    /* TMZ: will this command be submitted using the TMZ flag */
130    bool secure;
131 };
132 
133 #define BUFFER_HASHLIST_SIZE 4096
134 
135 struct amdgpu_cs {
136    struct amdgpu_ib main; /* must be first because this is inherited */
137    struct amdgpu_winsys *ws;
138    struct amdgpu_ctx *ctx;
139    enum ring_type ring_type;
140    struct drm_amdgpu_cs_chunk_fence fence_chunk;
141 
142    /* We flip between these two CS. While one is being consumed
143     * by the kernel in another thread, the other one is being filled
144     * by the pipe driver. */
145    struct amdgpu_cs_context csc1;
146    struct amdgpu_cs_context csc2;
147    /* The currently-used CS. */
148    struct amdgpu_cs_context *csc;
149    /* The CS being currently-owned by the other thread. */
150    struct amdgpu_cs_context *cst;
151    /* buffer_indices_hashlist[hash(bo)] returns -1 if the bo
152     * isn't part of any buffer lists or the index where the bo could be found.
153     * Since 1) hash collisions of 2 different bo can happen and 2) we use a
154     * single hashlist for the 3 buffer list, this is only a hint.
155     * amdgpu_lookup_buffer uses this hint to speed up buffers look up.
156     */
157    int16_t buffer_indices_hashlist[BUFFER_HASHLIST_SIZE];
158 
159    /* Flush CS. */
160    void (*flush_cs)(void *ctx, unsigned flags, struct pipe_fence_handle **fence);
161    void *flush_data;
162    bool stop_exec_on_failure;
163    bool noop;
164    bool has_chaining;
165 
166    struct util_queue_fence flush_completed;
167    struct pipe_fence_handle *next_fence;
168    struct pb_buffer *preamble_ib_bo;
169 };
170 
171 struct amdgpu_fence {
172    struct pipe_reference reference;
173    /* If ctx == NULL, this fence is syncobj-based. */
174    uint32_t syncobj;
175 
176    struct amdgpu_winsys *ws;
177    struct amdgpu_ctx *ctx;  /* submission context */
178    struct amdgpu_cs_fence fence;
179    uint64_t *user_fence_cpu_address;
180 
181    /* If the fence has been submitted. This is unsignalled for deferred fences
182     * (cs->next_fence) and while an IB is still being submitted in the submit
183     * thread. */
184    struct util_queue_fence submitted;
185 
186    volatile int signalled;              /* bool (int for atomicity) */
187 };
188 
amdgpu_fence_is_syncobj(struct amdgpu_fence * fence)189 static inline bool amdgpu_fence_is_syncobj(struct amdgpu_fence *fence)
190 {
191    return fence->ctx == NULL;
192 }
193 
amdgpu_ctx_unref(struct amdgpu_ctx * ctx)194 static inline void amdgpu_ctx_unref(struct amdgpu_ctx *ctx)
195 {
196    if (p_atomic_dec_zero(&ctx->refcount)) {
197       amdgpu_cs_ctx_free(ctx->ctx);
198       amdgpu_bo_free(ctx->user_fence_bo);
199       FREE(ctx);
200    }
201 }
202 
amdgpu_fence_reference(struct pipe_fence_handle ** dst,struct pipe_fence_handle * src)203 static inline void amdgpu_fence_reference(struct pipe_fence_handle **dst,
204                                           struct pipe_fence_handle *src)
205 {
206    struct amdgpu_fence **adst = (struct amdgpu_fence **)dst;
207    struct amdgpu_fence *asrc = (struct amdgpu_fence *)src;
208 
209    if (pipe_reference(&(*adst)->reference, &asrc->reference)) {
210       struct amdgpu_fence *fence = *adst;
211 
212       if (amdgpu_fence_is_syncobj(fence))
213          amdgpu_cs_destroy_syncobj(fence->ws->dev, fence->syncobj);
214       else
215          amdgpu_ctx_unref(fence->ctx);
216 
217       util_queue_fence_destroy(&fence->submitted);
218       FREE(fence);
219    }
220    *adst = asrc;
221 }
222 
223 int amdgpu_lookup_buffer_any_type(struct amdgpu_cs_context *cs, struct amdgpu_winsys_bo *bo);
224 
225 static inline struct amdgpu_cs *
amdgpu_cs(struct radeon_cmdbuf * rcs)226 amdgpu_cs(struct radeon_cmdbuf *rcs)
227 {
228    struct amdgpu_cs *cs = (struct amdgpu_cs*)rcs->priv;
229    assert(!cs || cs->main.ib_type == IB_MAIN);
230    return cs;
231 }
232 
233 #define get_container(member_ptr, container_type, container_member) \
234    (container_type *)((char *)(member_ptr) - offsetof(container_type, container_member))
235 
236 static inline bool
amdgpu_bo_is_referenced_by_cs(struct amdgpu_cs * cs,struct amdgpu_winsys_bo * bo)237 amdgpu_bo_is_referenced_by_cs(struct amdgpu_cs *cs,
238                               struct amdgpu_winsys_bo *bo)
239 {
240    return amdgpu_lookup_buffer_any_type(cs->csc, bo) != -1;
241 }
242 
243 static inline bool
amdgpu_bo_is_referenced_by_cs_with_usage(struct amdgpu_cs * cs,struct amdgpu_winsys_bo * bo,enum radeon_bo_usage usage)244 amdgpu_bo_is_referenced_by_cs_with_usage(struct amdgpu_cs *cs,
245                                          struct amdgpu_winsys_bo *bo,
246                                          enum radeon_bo_usage usage)
247 {
248    int index;
249    struct amdgpu_cs_buffer *buffer;
250 
251    index = amdgpu_lookup_buffer_any_type(cs->csc, bo);
252    if (index == -1)
253       return false;
254 
255    buffer = bo->bo ? &cs->csc->real_buffers[index] :
256             bo->base.usage & RADEON_FLAG_SPARSE ? &cs->csc->sparse_buffers[index] :
257             &cs->csc->slab_buffers[index];
258 
259    return (buffer->usage & usage) != 0;
260 }
261 
262 bool amdgpu_fence_wait(struct pipe_fence_handle *fence, uint64_t timeout,
263                        bool absolute);
264 void amdgpu_add_fences(struct amdgpu_winsys_bo *bo,
265                        unsigned num_fences,
266                        struct pipe_fence_handle **fences);
267 void amdgpu_cs_sync_flush(struct radeon_cmdbuf *rcs);
268 void amdgpu_cs_init_functions(struct amdgpu_screen_winsys *ws);
269 
270 #endif
271