1 /*
2  * Copyright © 2017 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #ifndef IRIS_BATCH_DOT_H
25 #define IRIS_BATCH_DOT_H
26 
27 #include <stdint.h>
28 #include <stdbool.h>
29 #include <string.h>
30 
31 #include "util/u_dynarray.h"
32 
33 #include "drm-uapi/i915_drm.h"
34 #include "common/intel_decoder.h"
35 
36 #include "iris_fence.h"
37 #include "iris_fine_fence.h"
38 
39 struct iris_context;
40 
41 /* The kernel assumes batchbuffers are smaller than 256kB. */
42 #define MAX_BATCH_SIZE (256 * 1024)
43 
44 /* Terminating the batch takes either 4 bytes for MI_BATCH_BUFFER_END or 12
45  * bytes for MI_BATCH_BUFFER_START (when chaining).  Plus another 24 bytes for
46  * the seqno write (using PIPE_CONTROL), and another 24 bytes for the ISP
47  * invalidation pipe control.
48  */
49 #define BATCH_RESERVED 60
50 
51 /* Our target batch size - flush approximately at this point. */
52 #define BATCH_SZ (64 * 1024 - BATCH_RESERVED)
53 
54 enum iris_batch_name {
55    IRIS_BATCH_RENDER,
56    IRIS_BATCH_COMPUTE,
57 };
58 
59 struct iris_batch {
60    struct iris_context *ice;
61    struct iris_screen *screen;
62    struct pipe_debug_callback *dbg;
63    struct pipe_device_reset_callback *reset;
64 
65    /** What batch is this? (e.g. IRIS_BATCH_RENDER/COMPUTE) */
66    enum iris_batch_name name;
67 
68    /** Current batchbuffer being queued up. */
69    struct iris_bo *bo;
70    void *map;
71    void *map_next;
72 
73    /** Size of the primary batch being submitted to execbuf (in bytes). */
74    unsigned primary_batch_size;
75 
76    /** Total size of all chained batches (in bytes). */
77    unsigned total_chained_batch_size;
78 
79    /** Last Surface State Base Address set in this hardware context. */
80    uint64_t last_surface_base_address;
81 
82    uint32_t hw_ctx_id;
83 
84    /** A list of all BOs referenced by this batch */
85    struct iris_bo **exec_bos;
86    int exec_count;
87    int exec_array_size;
88    /** Bitset of whether this batch writes to BO `i'. */
89    BITSET_WORD *bos_written;
90    uint32_t max_gem_handle;
91 
92    /** Whether INTEL_BLACKHOLE_RENDER is enabled in the batch (aka first
93     * instruction is a MI_BATCH_BUFFER_END).
94     */
95    bool noop_enabled;
96 
97    /**
98     * A list of iris_syncobjs associated with this batch.
99     *
100     * The first list entry will always be a signalling sync-point, indicating
101     * that this batch has completed.  The others are likely to be sync-points
102     * to wait on before executing the batch.
103     */
104    struct util_dynarray syncobjs;
105 
106    /** A list of drm_i915_exec_fences to have execbuf signal or wait on */
107    struct util_dynarray exec_fences;
108 
109    /** The amount of aperture space (in bytes) used by all exec_bos */
110    int aperture_space;
111 
112    struct {
113       /** Uploader to use for sequence numbers */
114       struct u_upload_mgr *uploader;
115 
116       /** GPU buffer and CPU map where our seqno's will be written. */
117       struct iris_state_ref ref;
118       uint32_t *map;
119 
120       /** The sequence number to write the next time we add a fence. */
121       uint32_t next;
122    } fine_fences;
123 
124    /** A seqno (and syncobj) for the last batch that was submitted. */
125    struct iris_fine_fence *last_fence;
126 
127    /** List of other batches which we might need to flush to use a BO */
128    struct iris_batch *other_batches[IRIS_BATCH_COUNT - 1];
129 
130    struct {
131       /**
132        * Set of struct brw_bo * that have been rendered to within this
133        * batchbuffer and would need flushing before being used from another
134        * cache domain that isn't coherent with it (i.e. the sampler).
135        */
136       struct hash_table *render;
137    } cache;
138 
139    struct intel_batch_decode_ctx decoder;
140    struct hash_table_u64 *state_sizes;
141 
142    /**
143     * Matrix representation of the cache coherency status of the GPU at the
144     * current end point of the batch.  For every i and j,
145     * coherent_seqnos[i][j] denotes the seqno of the most recent flush of
146     * cache domain j visible to cache domain i (which obviously implies that
147     * coherent_seqnos[i][i] is the most recent flush of cache domain i).  This
148     * can be used to efficiently determine whether synchronization is
149     * necessary before accessing data from cache domain i if it was previously
150     * accessed from another cache domain j.
151     */
152    uint64_t coherent_seqnos[NUM_IRIS_DOMAINS][NUM_IRIS_DOMAINS];
153 
154    /**
155     * Sequence number used to track the completion of any subsequent memory
156     * operations in the batch until the next sync boundary.
157     */
158    uint64_t next_seqno;
159 
160    /** Have we emitted any draw calls to this batch? */
161    bool contains_draw;
162 
163    /** Have we emitted any draw calls with next_seqno? */
164    bool contains_draw_with_next_seqno;
165 
166    /** Batch contains fence signal operation. */
167    bool contains_fence_signal;
168 
169    /**
170     * Number of times iris_batch_sync_region_start() has been called without a
171     * matching iris_batch_sync_region_end() on this batch.
172     */
173    uint32_t sync_region_depth;
174 
175    uint32_t last_aux_map_state;
176    struct iris_measure_batch *measure;
177 };
178 
179 void iris_init_batch(struct iris_context *ice,
180                      enum iris_batch_name name,
181                      int priority);
182 void iris_chain_to_new_batch(struct iris_batch *batch);
183 void iris_batch_free(struct iris_batch *batch);
184 void iris_batch_maybe_flush(struct iris_batch *batch, unsigned estimate);
185 
186 void _iris_batch_flush(struct iris_batch *batch, const char *file, int line);
187 #define iris_batch_flush(batch) _iris_batch_flush((batch), __FILE__, __LINE__)
188 
189 bool iris_batch_references(struct iris_batch *batch, struct iris_bo *bo);
190 
191 bool iris_batch_prepare_noop(struct iris_batch *batch, bool noop_enable);
192 
193 #define RELOC_WRITE EXEC_OBJECT_WRITE
194 
195 void iris_use_pinned_bo(struct iris_batch *batch, struct iris_bo *bo,
196                         bool writable, enum iris_domain access);
197 
198 enum pipe_reset_status iris_batch_check_for_reset(struct iris_batch *batch);
199 
200 static inline unsigned
iris_batch_bytes_used(struct iris_batch * batch)201 iris_batch_bytes_used(struct iris_batch *batch)
202 {
203    return batch->map_next - batch->map;
204 }
205 
206 /**
207  * Ensure the current command buffer has \param size bytes of space
208  * remaining.  If not, this creates a secondary batch buffer and emits
209  * a jump from the primary batch to the start of the secondary.
210  *
211  * Most callers want iris_get_command_space() instead.
212  */
213 static inline void
iris_require_command_space(struct iris_batch * batch,unsigned size)214 iris_require_command_space(struct iris_batch *batch, unsigned size)
215 {
216    const unsigned required_bytes = iris_batch_bytes_used(batch) + size;
217 
218    if (required_bytes >= BATCH_SZ) {
219       iris_chain_to_new_batch(batch);
220    }
221 }
222 
223 /**
224  * Allocate space in the current command buffer, and return a pointer
225  * to the mapped area so the caller can write commands there.
226  *
227  * This should be called whenever emitting commands.
228  */
229 static inline void *
iris_get_command_space(struct iris_batch * batch,unsigned bytes)230 iris_get_command_space(struct iris_batch *batch, unsigned bytes)
231 {
232    iris_require_command_space(batch, bytes);
233    void *map = batch->map_next;
234    batch->map_next += bytes;
235    return map;
236 }
237 
238 /**
239  * Helper to emit GPU commands - allocates space, copies them there.
240  */
241 static inline void
iris_batch_emit(struct iris_batch * batch,const void * data,unsigned size)242 iris_batch_emit(struct iris_batch *batch, const void *data, unsigned size)
243 {
244    void *map = iris_get_command_space(batch, size);
245    memcpy(map, data, size);
246 }
247 
248 /**
249  * Get a pointer to the batch's signalling syncobj.  Does not refcount.
250  */
251 static inline struct iris_syncobj *
iris_batch_get_signal_syncobj(struct iris_batch * batch)252 iris_batch_get_signal_syncobj(struct iris_batch *batch)
253 {
254    /* The signalling syncobj is the first one in the list. */
255    struct iris_syncobj *syncobj =
256       ((struct iris_syncobj **) util_dynarray_begin(&batch->syncobjs))[0];
257    return syncobj;
258 }
259 
260 
261 /**
262  * Take a reference to the batch's signalling syncobj.
263  *
264  * Callers can use this to wait for the the current batch under construction
265  * to complete (after flushing it).
266  */
267 static inline void
iris_batch_reference_signal_syncobj(struct iris_batch * batch,struct iris_syncobj ** out_syncobj)268 iris_batch_reference_signal_syncobj(struct iris_batch *batch,
269                                    struct iris_syncobj **out_syncobj)
270 {
271    struct iris_syncobj *syncobj = iris_batch_get_signal_syncobj(batch);
272    iris_syncobj_reference(batch->screen->bufmgr, out_syncobj, syncobj);
273 }
274 
275 /**
276  * Record the size of a piece of state for use in INTEL_DEBUG=bat printing.
277  */
278 static inline void
iris_record_state_size(struct hash_table_u64 * ht,uint32_t offset_from_base,uint32_t size)279 iris_record_state_size(struct hash_table_u64 *ht,
280                        uint32_t offset_from_base,
281                        uint32_t size)
282 {
283    if (ht) {
284       _mesa_hash_table_u64_insert(ht, offset_from_base,
285                                   (void *)(uintptr_t) size);
286    }
287 }
288 
289 /**
290  * Mark the start of a region in the batch with stable synchronization
291  * sequence number.  Any buffer object accessed by the batch buffer only needs
292  * to be marked once (e.g. via iris_bo_bump_seqno()) within a region delimited
293  * by iris_batch_sync_region_start() and iris_batch_sync_region_end().
294  */
295 static inline void
iris_batch_sync_region_start(struct iris_batch * batch)296 iris_batch_sync_region_start(struct iris_batch *batch)
297 {
298    batch->sync_region_depth++;
299 }
300 
301 /**
302  * Mark the end of a region in the batch with stable synchronization sequence
303  * number.  Should be called once after each call to
304  * iris_batch_sync_region_start().
305  */
306 static inline void
iris_batch_sync_region_end(struct iris_batch * batch)307 iris_batch_sync_region_end(struct iris_batch *batch)
308 {
309    assert(batch->sync_region_depth);
310    batch->sync_region_depth--;
311 }
312 
313 /**
314  * Start a new synchronization section at the current point of the batch,
315  * unless disallowed by a previous iris_batch_sync_region_start().
316  */
317 static inline void
iris_batch_sync_boundary(struct iris_batch * batch)318 iris_batch_sync_boundary(struct iris_batch *batch)
319 {
320    if (!batch->sync_region_depth) {
321       batch->contains_draw_with_next_seqno = false;
322       batch->next_seqno = p_atomic_inc_return(&batch->screen->last_seqno);
323       assert(batch->next_seqno > 0);
324    }
325 }
326 
327 /**
328  * Update the cache coherency status of the batch to reflect a flush of the
329  * specified caching domain.
330  */
331 static inline void
iris_batch_mark_flush_sync(struct iris_batch * batch,enum iris_domain access)332 iris_batch_mark_flush_sync(struct iris_batch *batch,
333                            enum iris_domain access)
334 {
335    batch->coherent_seqnos[access][access] = batch->next_seqno - 1;
336 }
337 
338 /**
339  * Update the cache coherency status of the batch to reflect an invalidation
340  * of the specified caching domain.  All prior flushes of other caches will be
341  * considered visible to the specified caching domain.
342  */
343 static inline void
iris_batch_mark_invalidate_sync(struct iris_batch * batch,enum iris_domain access)344 iris_batch_mark_invalidate_sync(struct iris_batch *batch,
345                                 enum iris_domain access)
346 {
347    for (unsigned i = 0; i < NUM_IRIS_DOMAINS; i++)
348       batch->coherent_seqnos[access][i] = batch->coherent_seqnos[i][i];
349 }
350 
351 /**
352  * Update the cache coherency status of the batch to reflect a reset.  All
353  * previously accessed data can be considered visible to every caching domain
354  * thanks to the kernel's heavyweight flushing at batch buffer boundaries.
355  */
356 static inline void
iris_batch_mark_reset_sync(struct iris_batch * batch)357 iris_batch_mark_reset_sync(struct iris_batch *batch)
358 {
359    for (unsigned i = 0; i < NUM_IRIS_DOMAINS; i++)
360       for (unsigned j = 0; j < NUM_IRIS_DOMAINS; j++)
361          batch->coherent_seqnos[i][j] = batch->next_seqno - 1;
362 }
363 
364 #endif
365