1 /*
2  * Copyright © 2017 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #ifndef CROCUS_BATCH_DOT_H
25 #define CROCUS_BATCH_DOT_H
26 
27 #include <stdbool.h>
28 #include <stdint.h>
29 #include <string.h>
30 
31 #include "util/u_dynarray.h"
32 
33 #include "common/intel_decoder.h"
34 #include "drm-uapi/i915_drm.h"
35 
36 #include "crocus_fence.h"
37 #include "crocus_fine_fence.h"
38 
39 #include "crocus_bufmgr.h"
40 /* The kernel assumes batchbuffers are smaller than 256kB. */
41 #define MAX_BATCH_SIZE (256 * 1024)
42 
43 /* 3DSTATE_BINDING_TABLE_POINTERS has a U16 offset from Surface State Base
44  * Address, which means that we can't put binding tables beyond 64kB.  This
45  * effectively limits the maximum statebuffer size to 64kB.
46  */
47 #define MAX_STATE_SIZE (64 * 1024)
48 
49 /* Our target batch size - flush approximately at this point. */
50 #define BATCH_SZ (20 * 1024)
51 #define STATE_SZ (16 * 1024)
52 
53 enum crocus_batch_name {
54    CROCUS_BATCH_RENDER,
55    CROCUS_BATCH_COMPUTE,
56 };
57 
58 #define CROCUS_BATCH_COUNT 2
59 
60 struct crocus_address {
61    struct crocus_bo *bo;
62    int32_t offset;
63    uint32_t reloc_flags;
64 };
65 
66 struct crocus_reloc_list {
67    struct drm_i915_gem_relocation_entry *relocs;
68    int reloc_count;
69    int reloc_array_size;
70 };
71 
72 struct crocus_growing_bo {
73    struct crocus_bo *bo;
74    void *map;
75    void *map_next;
76    struct crocus_bo *partial_bo;
77    void *partial_bo_map;
78    unsigned partial_bytes;
79    struct crocus_reloc_list relocs;
80    unsigned used;
81 };
82 
83 struct crocus_batch {
84    struct crocus_context *ice;
85    struct crocus_screen *screen;
86    struct pipe_debug_callback *dbg;
87    struct pipe_device_reset_callback *reset;
88 
89    /** What batch is this? (e.g. CROCUS_BATCH_RENDER/COMPUTE) */
90    enum crocus_batch_name name;
91 
92    /** buffers: command, state */
93    struct crocus_growing_bo command, state;
94 
95    /** Size of the primary batch if we've moved on to a secondary. */
96    unsigned primary_batch_size;
97 
98    bool state_base_address_emitted;
99    uint8_t pipe_controls_since_last_cs_stall;
100 
101    uint32_t hw_ctx_id;
102 
103    uint32_t valid_reloc_flags;
104 
105    bool use_shadow_copy;
106    bool no_wrap;
107 
108    /** The validation list */
109    struct drm_i915_gem_exec_object2 *validation_list;
110    struct crocus_bo **exec_bos;
111    int exec_count;
112    int exec_array_size;
113 
114    /** Whether INTEL_BLACKHOLE_RENDER is enabled in the batch (aka first
115     * instruction is a MI_BATCH_BUFFER_END).
116     */
117    bool noop_enabled;
118 
119    /**
120     * A list of crocus_syncobjs associated with this batch.
121     *
122     * The first list entry will always be a signalling sync-point, indicating
123     * that this batch has completed.  The others are likely to be sync-points
124     * to wait on before executing the batch.
125     */
126    struct util_dynarray syncobjs;
127 
128    /** A list of drm_i915_exec_fences to have execbuf signal or wait on */
129    struct util_dynarray exec_fences;
130 
131    /** The amount of aperture space (in bytes) used by all exec_bos */
132    int aperture_space;
133 
134    struct {
135       /** Uploader to use for sequence numbers */
136       struct u_upload_mgr *uploader;
137 
138       /** GPU buffer and CPU map where our seqno's will be written. */
139       struct crocus_state_ref ref;
140       uint32_t *map;
141 
142       /** The sequence number to write the next time we add a fence. */
143       uint32_t next;
144    } fine_fences;
145 
146    /** A seqno (and syncobj) for the last batch that was submitted. */
147    struct crocus_fine_fence *last_fence;
148 
149    /** List of other batches which we might need to flush to use a BO */
150    struct crocus_batch *other_batches[CROCUS_BATCH_COUNT - 1];
151 
152    struct {
153       /**
154        * Set of struct brw_bo * that have been rendered to within this
155        * batchbuffer and would need flushing before being used from another
156        * cache domain that isn't coherent with it (i.e. the sampler).
157        */
158       struct hash_table *render;
159 
160       /**
161        * Set of struct brw_bo * that have been used as a depth buffer within
162        * this batchbuffer and would need flushing before being used from
163        * another cache domain that isn't coherent with it (i.e. the sampler).
164        */
165       struct set *depth;
166    } cache;
167 
168    struct intel_batch_decode_ctx decoder;
169    struct hash_table_u64 *state_sizes;
170 
171    /** Have we emitted any draw calls to this batch? */
172    bool contains_draw;
173 
174    /** Batch contains fence signal operation. */
175    bool contains_fence_signal;
176 };
177 
178 static inline bool
batch_has_fine_fence(struct crocus_batch * batch)179 batch_has_fine_fence(struct crocus_batch *batch)
180 {
181    return !!batch->fine_fences.uploader;
182 }
183 
184 #define BATCH_HAS_FINE_FENCES(batch) (!!(batch)->fine_fences.uploader)
185 void crocus_init_batch(struct crocus_context *ctx,
186                        enum crocus_batch_name name,
187                        int priority);
188 void crocus_batch_free(struct crocus_batch *batch);
189 void crocus_batch_maybe_flush(struct crocus_batch *batch, unsigned estimate);
190 
191 void _crocus_batch_flush(struct crocus_batch *batch, const char *file, int line);
192 #define crocus_batch_flush(batch) _crocus_batch_flush((batch), __FILE__, __LINE__)
193 
194 bool crocus_batch_references(struct crocus_batch *batch, struct crocus_bo *bo);
195 
196 bool crocus_batch_prepare_noop(struct crocus_batch *batch, bool noop_enable);
197 
198 #define RELOC_WRITE EXEC_OBJECT_WRITE
199 #define RELOC_NEEDS_GGTT EXEC_OBJECT_NEEDS_GTT
200 /* Inverted meaning, but using the same bit...emit_reloc will flip it. */
201 #define RELOC_32BIT EXEC_OBJECT_SUPPORTS_48B_ADDRESS
202 
203 void crocus_use_pinned_bo(struct crocus_batch *batch, struct crocus_bo *bo,
204                           bool writable);
205 uint64_t crocus_command_reloc(struct crocus_batch *batch, uint32_t batch_offset,
206                               struct crocus_bo *target, uint32_t target_offset,
207                               unsigned int reloc_flags);
208 uint64_t crocus_state_reloc(struct crocus_batch *batch, uint32_t batch_offset,
209                             struct crocus_bo *target, uint32_t target_offset,
210                             unsigned int reloc_flags);
211 
212 enum pipe_reset_status crocus_batch_check_for_reset(struct crocus_batch *batch);
213 
214 void crocus_grow_buffer(struct crocus_batch *batch, bool grow_state,
215                         unsigned used, unsigned new_size);
216 
217 static inline unsigned
crocus_batch_bytes_used(struct crocus_batch * batch)218 crocus_batch_bytes_used(struct crocus_batch *batch)
219 {
220    return batch->command.map_next - batch->command.map;
221 }
222 
223 /**
224  * Ensure the current command buffer has \param size bytes of space
225  * remaining.  If not, this creates a secondary batch buffer and emits
226  * a jump from the primary batch to the start of the secondary.
227  *
228  * Most callers want crocus_get_command_space() instead.
229  */
230 static inline void
crocus_require_command_space(struct crocus_batch * batch,unsigned size)231 crocus_require_command_space(struct crocus_batch *batch, unsigned size)
232 {
233    const unsigned required_bytes = crocus_batch_bytes_used(batch) + size;
234    unsigned used = crocus_batch_bytes_used(batch);
235    if (required_bytes >= BATCH_SZ && !batch->no_wrap) {
236       crocus_batch_flush(batch);
237    } else if (used + size >= batch->command.bo->size) {
238       const unsigned new_size =
239          MIN2(batch->command.bo->size + batch->command.bo->size / 2,
240               MAX_BATCH_SIZE);
241 
242       crocus_grow_buffer(batch, false, used, new_size);
243       batch->command.map_next = (void *)batch->command.map + used;
244       assert(crocus_batch_bytes_used(batch) + size < batch->command.bo->size);
245    }
246 }
247 
248 /**
249  * Allocate space in the current command buffer, and return a pointer
250  * to the mapped area so the caller can write commands there.
251  *
252  * This should be called whenever emitting commands.
253  */
254 static inline void *
crocus_get_command_space(struct crocus_batch * batch,unsigned bytes)255 crocus_get_command_space(struct crocus_batch *batch, unsigned bytes)
256 {
257    crocus_require_command_space(batch, bytes);
258    void *map = batch->command.map_next;
259    batch->command.map_next += bytes;
260    return map;
261 }
262 
263 /**
264  * Helper to emit GPU commands - allocates space, copies them there.
265  */
266 static inline void
crocus_batch_emit(struct crocus_batch * batch,const void * data,unsigned size)267 crocus_batch_emit(struct crocus_batch *batch, const void *data, unsigned size)
268 {
269    void *map = crocus_get_command_space(batch, size);
270    memcpy(map, data, size);
271 }
272 
273 /**
274  * Get a pointer to the batch's signalling syncobj.  Does not refcount.
275  */
276 static inline struct crocus_syncobj *
crocus_batch_get_signal_syncobj(struct crocus_batch * batch)277 crocus_batch_get_signal_syncobj(struct crocus_batch *batch)
278 {
279    /* The signalling syncobj is the first one in the list. */
280    struct crocus_syncobj *syncobj =
281       ((struct crocus_syncobj **)util_dynarray_begin(&batch->syncobjs))[0];
282    return syncobj;
283 }
284 
285 /**
286  * Take a reference to the batch's signalling syncobj.
287  *
288  * Callers can use this to wait for the the current batch under construction
289  * to complete (after flushing it).
290  */
291 static inline void
crocus_batch_reference_signal_syncobj(struct crocus_batch * batch,struct crocus_syncobj ** out_syncobj)292 crocus_batch_reference_signal_syncobj(struct crocus_batch *batch,
293                                       struct crocus_syncobj **out_syncobj)
294 {
295    struct crocus_syncobj *syncobj = crocus_batch_get_signal_syncobj(batch);
296    crocus_syncobj_reference(batch->screen, out_syncobj, syncobj);
297 }
298 
299 /**
300  * Record the size of a piece of state for use in INTEL_DEBUG=bat printing.
301  */
302 static inline void
crocus_record_state_size(struct hash_table_u64 * ht,uint32_t offset_from_base,uint32_t size)303 crocus_record_state_size(struct hash_table_u64 *ht, uint32_t offset_from_base,
304                          uint32_t size)
305 {
306    if (ht) {
307       _mesa_hash_table_u64_insert(ht, offset_from_base,
308                                   (void *)(uintptr_t)size);
309    }
310 }
311 
312 static inline bool
crocus_ptr_in_state_buffer(struct crocus_batch * batch,void * p)313 crocus_ptr_in_state_buffer(struct crocus_batch *batch, void *p)
314 {
315    return (char *)p >= (char *)batch->state.map &&
316           (char *)p < (char *)batch->state.map + batch->state.bo->size;
317 }
318 
319 static inline void
crocus_require_statebuffer_space(struct crocus_batch * batch,int size)320 crocus_require_statebuffer_space(struct crocus_batch *batch, int size)
321 {
322    if (batch->state.used + size >= STATE_SZ)
323       crocus_batch_flush(batch);
324 }
325 #endif
326