1 /*
2  * Copyright © 2017 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  */
23 #ifndef CROCUS_CONTEXT_H
24 #define CROCUS_CONTEXT_H
25 
26 #include "pipe/p_context.h"
27 #include "pipe/p_state.h"
28 #include "util/u_debug.h"
29 #include "util/u_threaded_context.h"
30 #include "intel/blorp/blorp.h"
31 #include "intel/dev/intel_debug.h"
32 #include "intel/compiler/brw_compiler.h"
33 #include "crocus_batch.h"
34 #include "crocus_fence.h"
35 #include "crocus_resource.h"
36 #include "crocus_screen.h"
37 #include "util/u_blitter.h"
38 
39 struct crocus_bo;
40 struct crocus_context;
41 struct blorp_batch;
42 struct blorp_params;
43 
44 #define CROCUS_MAX_TEXTURE_BUFFER_SIZE (1 << 27)
45 #define CROCUS_MAX_TEXTURE_SAMPLERS 32
46 /* CROCUS_MAX_ABOS and CROCUS_MAX_SSBOS must be the same. */
47 #define CROCUS_MAX_ABOS 16
48 #define CROCUS_MAX_SSBOS 16
49 #define CROCUS_MAX_VIEWPORTS 16
50 #define CROCUS_MAX_CLIP_PLANES 8
51 
52 enum crocus_param_domain {
53    BRW_PARAM_DOMAIN_BUILTIN = 0,
54    BRW_PARAM_DOMAIN_IMAGE,
55 };
56 
57 enum {
58    DRI_CONF_BO_REUSE_DISABLED,
59    DRI_CONF_BO_REUSE_ALL
60 };
61 
62 #define BRW_PARAM(domain, val)   (BRW_PARAM_DOMAIN_##domain << 24 | (val))
63 #define BRW_PARAM_DOMAIN(param)  ((uint32_t)(param) >> 24)
64 #define BRW_PARAM_VALUE(param)   ((uint32_t)(param) & 0x00ffffff)
65 #define BRW_PARAM_IMAGE(idx, offset) BRW_PARAM(IMAGE, ((idx) << 8) | (offset))
66 #define BRW_PARAM_IMAGE_IDX(value)   (BRW_PARAM_VALUE(value) >> 8)
67 #define BRW_PARAM_IMAGE_OFFSET(value)(BRW_PARAM_VALUE(value) & 0xf)
68 
69 /**
70  * Dirty flags.  When state changes, we flag some combination of these
71  * to indicate that particular GPU commands need to be re-emitted.
72  *
73  * Each bit typically corresponds to a single 3DSTATE_* command packet, but
74  * in rare cases they map to a group of related packets that need to be
75  * emitted together.
76  *
77  * See crocus_upload_render_state().
78  */
79 #define CROCUS_DIRTY_COLOR_CALC_STATE         (1ull <<  0)
80 #define CROCUS_DIRTY_POLYGON_STIPPLE          (1ull <<  1)
81 #define CROCUS_DIRTY_CC_VIEWPORT              (1ull <<  2)
82 #define CROCUS_DIRTY_SF_CL_VIEWPORT           (1ull <<  3)
83 #define CROCUS_DIRTY_RASTER                   (1ull <<  4)
84 #define CROCUS_DIRTY_CLIP                     (1ull <<  5)
85 #define CROCUS_DIRTY_LINE_STIPPLE             (1ull <<  6)
86 #define CROCUS_DIRTY_VERTEX_ELEMENTS          (1ull <<  7)
87 #define CROCUS_DIRTY_VERTEX_BUFFERS           (1ull <<  8)
88 #define CROCUS_DIRTY_DRAWING_RECTANGLE        (1ull <<  9)
89 #define CROCUS_DIRTY_GEN6_URB                 (1ull << 10)
90 #define CROCUS_DIRTY_DEPTH_BUFFER             (1ull << 11)
91 #define CROCUS_DIRTY_WM                       (1ull << 12)
92 #define CROCUS_DIRTY_SO_DECL_LIST             (1ull << 13)
93 #define CROCUS_DIRTY_STREAMOUT                (1ull << 14)
94 #define CROCUS_DIRTY_GEN4_CONSTANT_COLOR      (1ull << 15)
95 #define CROCUS_DIRTY_GEN4_CURBE               (1ull << 16)
96 #define CROCUS_DIRTY_GEN4_URB_FENCE           (1ull << 17)
97 #define CROCUS_DIRTY_GEN5_PIPELINED_POINTERS  (1ull << 18)
98 #define CROCUS_DIRTY_GEN5_BINDING_TABLE_POINTERS  (1ull << 19)
99 #define CROCUS_DIRTY_GEN6_BLEND_STATE         (1ull << 20)
100 #define CROCUS_DIRTY_GEN6_SCISSOR_RECT        (1ull << 21)
101 #define CROCUS_DIRTY_GEN6_WM_DEPTH_STENCIL    (1ull << 22)
102 #define CROCUS_DIRTY_GEN6_MULTISAMPLE         (1ull << 23)
103 #define CROCUS_DIRTY_GEN6_SAMPLE_MASK         (1ull << 24)
104 #define CROCUS_DIRTY_GEN7_SBE                 (1ull << 25)
105 #define CROCUS_DIRTY_GEN7_L3_CONFIG           (1ull << 26)
106 #define CROCUS_DIRTY_GEN7_SO_BUFFERS          (1ull << 27)
107 #define CROCUS_DIRTY_GEN75_VF                 (1ull << 28)
108 #define CROCUS_DIRTY_RENDER_RESOLVES_AND_FLUSHES  (1ull << 29)
109 #define CROCUS_DIRTY_COMPUTE_RESOLVES_AND_FLUSHES (1ull << 30)
110 #define CROCUS_DIRTY_VF_STATISTICS            (1ull << 31)
111 #define CROCUS_DIRTY_GEN4_CLIP_PROG           (1ull << 32)
112 #define CROCUS_DIRTY_GEN4_SF_PROG             (1ull << 33)
113 #define CROCUS_DIRTY_GEN4_FF_GS_PROG          (1ull << 34)
114 #define CROCUS_DIRTY_GEN6_SAMPLER_STATE_POINTERS (1ull << 35)
115 #define CROCUS_DIRTY_GEN6_SVBI                (1ull << 36)
116 #define CROCUS_DIRTY_GEN8_VF_TOPOLOGY         (1ull << 37)
117 #define CROCUS_DIRTY_GEN8_PMA_FIX             (1ull << 38)
118 #define CROCUS_DIRTY_GEN8_VF_SGVS             (1ull << 39)
119 #define CROCUS_DIRTY_GEN8_PS_BLEND            (1ull << 40)
120 
121 #define CROCUS_ALL_DIRTY_FOR_COMPUTE (CROCUS_DIRTY_COMPUTE_RESOLVES_AND_FLUSHES)
122 
123 #define CROCUS_ALL_DIRTY_FOR_RENDER (~CROCUS_ALL_DIRTY_FOR_COMPUTE)
124 
125 /**
126  * Per-stage dirty flags.  When state changes, we flag some combination of
127  * these to indicate that particular GPU commands need to be re-emitted.
128  * Unlike the IRIS_DIRTY_* flags these are shader stage-specific and can be
129  * indexed by shifting the mask by the shader stage index.
130  *
131  * See crocus_upload_render_state().
132  */
133 #define CROCUS_STAGE_DIRTY_SAMPLER_STATES_VS        (1ull << 0)
134 #define CROCUS_STAGE_DIRTY_SAMPLER_STATES_TCS       (1ull << 1)
135 #define CROCUS_STAGE_DIRTY_SAMPLER_STATES_TES       (1ull << 2)
136 #define CROCUS_STAGE_DIRTY_SAMPLER_STATES_GS        (1ull << 3)
137 #define CROCUS_STAGE_DIRTY_SAMPLER_STATES_PS        (1ull << 4)
138 #define CROCUS_STAGE_DIRTY_SAMPLER_STATES_CS        (1ull << 5)
139 #define CROCUS_STAGE_DIRTY_UNCOMPILED_VS            (1ull << 6)
140 #define CROCUS_STAGE_DIRTY_UNCOMPILED_TCS           (1ull << 7)
141 #define CROCUS_STAGE_DIRTY_UNCOMPILED_TES           (1ull << 8)
142 #define CROCUS_STAGE_DIRTY_UNCOMPILED_GS            (1ull << 9)
143 #define CROCUS_STAGE_DIRTY_UNCOMPILED_FS            (1ull << 10)
144 #define CROCUS_STAGE_DIRTY_UNCOMPILED_CS            (1ull << 11)
145 #define CROCUS_STAGE_DIRTY_VS                       (1ull << 12)
146 #define CROCUS_STAGE_DIRTY_TCS                      (1ull << 13)
147 #define CROCUS_STAGE_DIRTY_TES                      (1ull << 14)
148 #define CROCUS_STAGE_DIRTY_GS                       (1ull << 15)
149 #define CROCUS_STAGE_DIRTY_FS                       (1ull << 16)
150 #define CROCUS_STAGE_DIRTY_CS                       (1ull << 17)
151 #define CROCUS_SHIFT_FOR_STAGE_DIRTY_CONSTANTS      18
152 #define CROCUS_STAGE_DIRTY_CONSTANTS_VS             (1ull << 18)
153 #define CROCUS_STAGE_DIRTY_CONSTANTS_TCS            (1ull << 19)
154 #define CROCUS_STAGE_DIRTY_CONSTANTS_TES            (1ull << 20)
155 #define CROCUS_STAGE_DIRTY_CONSTANTS_GS             (1ull << 21)
156 #define CROCUS_STAGE_DIRTY_CONSTANTS_FS             (1ull << 22)
157 #define CROCUS_STAGE_DIRTY_CONSTANTS_CS             (1ull << 23)
158 #define CROCUS_STAGE_DIRTY_BINDINGS_VS              (1ull << 24)
159 #define CROCUS_STAGE_DIRTY_BINDINGS_TCS             (1ull << 25)
160 #define CROCUS_STAGE_DIRTY_BINDINGS_TES             (1ull << 26)
161 #define CROCUS_STAGE_DIRTY_BINDINGS_GS              (1ull << 27)
162 #define CROCUS_STAGE_DIRTY_BINDINGS_FS              (1ull << 28)
163 #define CROCUS_STAGE_DIRTY_BINDINGS_CS              (1ull << 29)
164 
165 #define CROCUS_ALL_STAGE_DIRTY_FOR_COMPUTE (CROCUS_STAGE_DIRTY_CS | \
166                                           CROCUS_STAGE_DIRTY_SAMPLER_STATES_CS | \
167                                           CROCUS_STAGE_DIRTY_UNCOMPILED_CS |    \
168                                           CROCUS_STAGE_DIRTY_CONSTANTS_CS |     \
169                                           CROCUS_STAGE_DIRTY_BINDINGS_CS)
170 
171 #define CROCUS_ALL_STAGE_DIRTY_FOR_RENDER (~CROCUS_ALL_STAGE_DIRTY_FOR_COMPUTE)
172 
173 #define CROCUS_ALL_STAGE_DIRTY_BINDINGS (CROCUS_STAGE_DIRTY_BINDINGS_VS  | \
174                                        CROCUS_STAGE_DIRTY_BINDINGS_TCS | \
175                                        CROCUS_STAGE_DIRTY_BINDINGS_TES | \
176                                        CROCUS_STAGE_DIRTY_BINDINGS_GS  | \
177                                        CROCUS_STAGE_DIRTY_BINDINGS_FS  | \
178                                        CROCUS_STAGE_DIRTY_BINDINGS_CS)
179 
180 #define CROCUS_RENDER_STAGE_DIRTY_CONSTANTS (CROCUS_STAGE_DIRTY_CONSTANTS_VS  | \
181                                              CROCUS_STAGE_DIRTY_CONSTANTS_TCS | \
182                                              CROCUS_STAGE_DIRTY_CONSTANTS_TES | \
183                                              CROCUS_STAGE_DIRTY_CONSTANTS_GS  | \
184                                              CROCUS_STAGE_DIRTY_CONSTANTS_FS)
185 
186 /**
187  * Non-orthogonal state (NOS) dependency flags.
188  *
189  * Shader programs may depend on non-orthogonal state.  These flags are
190  * used to indicate that a shader's key depends on the state provided by
191  * a certain Gallium CSO.  Changing any CSOs marked as a dependency will
192  * cause the driver to re-compute the shader key, possibly triggering a
193  * shader recompile.
194  */
195 enum crocus_nos_dep {
196    CROCUS_NOS_FRAMEBUFFER,
197    CROCUS_NOS_DEPTH_STENCIL_ALPHA,
198    CROCUS_NOS_RASTERIZER,
199    CROCUS_NOS_BLEND,
200    CROCUS_NOS_LAST_VUE_MAP,
201    CROCUS_NOS_TEXTURES,
202    CROCUS_NOS_VERTEX_ELEMENTS,
203    CROCUS_NOS_COUNT,
204 };
205 
206 struct crocus_depth_stencil_alpha_state;
207 
208 /**
209  * Cache IDs for the in-memory program cache (ice->shaders.cache).
210  */
211 enum crocus_program_cache_id {
212    CROCUS_CACHE_VS  = MESA_SHADER_VERTEX,
213    CROCUS_CACHE_TCS = MESA_SHADER_TESS_CTRL,
214    CROCUS_CACHE_TES = MESA_SHADER_TESS_EVAL,
215    CROCUS_CACHE_GS  = MESA_SHADER_GEOMETRY,
216    CROCUS_CACHE_FS  = MESA_SHADER_FRAGMENT,
217    CROCUS_CACHE_CS  = MESA_SHADER_COMPUTE,
218    CROCUS_CACHE_BLORP,
219    CROCUS_CACHE_SF,
220    CROCUS_CACHE_CLIP,
221    CROCUS_CACHE_FF_GS,
222 };
223 
224 /** @{
225  *
226  * Defines for PIPE_CONTROL operations, which trigger cache flushes,
227  * synchronization, pipelined memory writes, and so on.
228  *
229  * The bits here are not the actual hardware values.  The actual fields
230  * move between various generations, so we just have flags for each
231  * potential operation, and use genxml to encode the actual packet.
232  */
233 enum pipe_control_flags
234 {
235    PIPE_CONTROL_FLUSH_LLC                       = (1 << 1),
236    PIPE_CONTROL_LRI_POST_SYNC_OP                = (1 << 2),
237    PIPE_CONTROL_STORE_DATA_INDEX                = (1 << 3),
238    PIPE_CONTROL_CS_STALL                        = (1 << 4),
239    PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET     = (1 << 5),
240    PIPE_CONTROL_SYNC_GFDT                       = (1 << 6),
241    PIPE_CONTROL_TLB_INVALIDATE                  = (1 << 7),
242    PIPE_CONTROL_MEDIA_STATE_CLEAR               = (1 << 8),
243    PIPE_CONTROL_WRITE_IMMEDIATE                 = (1 << 9),
244    PIPE_CONTROL_WRITE_DEPTH_COUNT               = (1 << 10),
245    PIPE_CONTROL_WRITE_TIMESTAMP                 = (1 << 11),
246    PIPE_CONTROL_DEPTH_STALL                     = (1 << 12),
247    PIPE_CONTROL_RENDER_TARGET_FLUSH             = (1 << 13),
248    PIPE_CONTROL_INSTRUCTION_INVALIDATE          = (1 << 14),
249    PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE        = (1 << 15),
250    PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE = (1 << 16),
251    PIPE_CONTROL_NOTIFY_ENABLE                   = (1 << 17),
252    PIPE_CONTROL_FLUSH_ENABLE                    = (1 << 18),
253    PIPE_CONTROL_DATA_CACHE_FLUSH                = (1 << 19),
254    PIPE_CONTROL_VF_CACHE_INVALIDATE             = (1 << 20),
255    PIPE_CONTROL_CONST_CACHE_INVALIDATE          = (1 << 21),
256    PIPE_CONTROL_STATE_CACHE_INVALIDATE          = (1 << 22),
257    PIPE_CONTROL_STALL_AT_SCOREBOARD             = (1 << 23),
258    PIPE_CONTROL_DEPTH_CACHE_FLUSH               = (1 << 24),
259    PIPE_CONTROL_TILE_CACHE_FLUSH                = (1 << 25),
260 };
261 
262 #define PIPE_CONTROL_CACHE_FLUSH_BITS           \
263    (PIPE_CONTROL_DEPTH_CACHE_FLUSH |            \
264     PIPE_CONTROL_DATA_CACHE_FLUSH |             \
265     PIPE_CONTROL_RENDER_TARGET_FLUSH)
266 
267 #define PIPE_CONTROL_CACHE_INVALIDATE_BITS      \
268    (PIPE_CONTROL_STATE_CACHE_INVALIDATE |       \
269     PIPE_CONTROL_CONST_CACHE_INVALIDATE |       \
270     PIPE_CONTROL_VF_CACHE_INVALIDATE |          \
271     PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |     \
272     PIPE_CONTROL_INSTRUCTION_INVALIDATE)
273 
274 enum crocus_predicate_state {
275    /* The first two states are used if we can determine whether to draw
276     * without having to look at the values in the query object buffer. This
277     * will happen if there is no conditional render in progress, if the query
278     * object is already completed or if something else has already added
279     * samples to the preliminary result.
280     */
281    CROCUS_PREDICATE_STATE_RENDER,
282    CROCUS_PREDICATE_STATE_DONT_RENDER,
283 
284    /* In this case whether to draw or not depends on the result of an
285     * MI_PREDICATE command so the predicate enable bit needs to be checked.
286     */
287    CROCUS_PREDICATE_STATE_USE_BIT,
288    /* In this case, either MI_PREDICATE doesn't exist or we lack the
289     * necessary kernel features to use it.  Stall for the query result.
290     */
291    CROCUS_PREDICATE_STATE_STALL_FOR_QUERY,
292 };
293 
294 /** @} */
295 
296 /**
297  * An uncompiled, API-facing shader.  This is the Gallium CSO for shaders.
298  * It primarily contains the NIR for the shader.
299  *
300  * Each API-facing shader can be compiled into multiple shader variants,
301  * based on non-orthogonal state dependencies, recorded in the shader key.
302  *
303  * See crocus_compiled_shader, which represents a compiled shader variant.
304  */
305 struct crocus_uncompiled_shader {
306    struct nir_shader *nir;
307 
308    struct pipe_stream_output_info stream_output;
309 
310    /* A SHA1 of the serialized NIR for the disk cache. */
311    unsigned char nir_sha1[20];
312 
313    unsigned program_id;
314 
315    /** Bitfield of (1 << CROCUS_NOS_*) flags. */
316    unsigned nos;
317 
318    /** Have any shader variants been compiled yet? */
319    bool compiled_once;
320 
321    bool needs_edge_flag;
322 
323    /** Constant data scraped from the shader by nir_opt_large_constants */
324    struct pipe_resource *const_data;
325 
326    /** Surface state for const_data */
327    struct crocus_state_ref const_data_state;
328 };
329 
330 enum crocus_surface_group {
331    CROCUS_SURFACE_GROUP_RENDER_TARGET,
332    CROCUS_SURFACE_GROUP_RENDER_TARGET_READ,
333    CROCUS_SURFACE_GROUP_SOL,
334    CROCUS_SURFACE_GROUP_CS_WORK_GROUPS,
335    CROCUS_SURFACE_GROUP_TEXTURE,
336    CROCUS_SURFACE_GROUP_TEXTURE_GATHER,
337    CROCUS_SURFACE_GROUP_IMAGE,
338    CROCUS_SURFACE_GROUP_UBO,
339    CROCUS_SURFACE_GROUP_SSBO,
340 
341    CROCUS_SURFACE_GROUP_COUNT,
342 };
343 
344 enum {
345    /* Invalid value for a binding table index. */
346    CROCUS_SURFACE_NOT_USED = 0xa0a0a0a0,
347 };
348 
349 struct crocus_binding_table {
350    uint32_t size_bytes;
351 
352    /** Number of surfaces in each group, before compacting. */
353    uint32_t sizes[CROCUS_SURFACE_GROUP_COUNT];
354 
355    /** Initial offset of each group. */
356    uint32_t offsets[CROCUS_SURFACE_GROUP_COUNT];
357 
358    /** Mask of surfaces used in each group. */
359    uint64_t used_mask[CROCUS_SURFACE_GROUP_COUNT];
360 };
361 
362 /**
363  * A compiled shader variant, containing a pointer to the GPU assembly,
364  * as well as program data and other packets needed by state upload.
365  *
366  * There can be several crocus_compiled_shader variants per API-level shader
367  * (crocus_uncompiled_shader), due to state-based recompiles (brw_*_prog_key).
368  */
369 struct crocus_compiled_shader {
370    /** Reference to the uploaded assembly. */
371    uint32_t offset;
372 
373    /* asm size in map */
374    uint32_t map_size;
375 
376    /** The program data (owned by the program cache hash table) */
377    struct brw_stage_prog_data *prog_data;
378    uint32_t prog_data_size;
379 
380    /** A list of system values to be uploaded as uniforms. */
381    enum brw_param_builtin *system_values;
382    unsigned num_system_values;
383 
384    /** Number of constbufs expected by the shader. */
385    unsigned num_cbufs;
386 
387    /**
388     * Derived 3DSTATE_STREAMOUT and 3DSTATE_SO_DECL_LIST packets
389     * (the VUE-based information for transform feedback outputs).
390     */
391    uint32_t *streamout;
392 
393    struct crocus_binding_table bt;
394 
395    uint32_t bind_bo_offset;
396    uint32_t surf_offset[128];//TODO
397 };
398 
399 /**
400  * API context state that is replicated per shader stage.
401  */
402 struct crocus_shader_state {
403    /** Uniform Buffers */
404    struct pipe_constant_buffer constbufs[PIPE_MAX_CONSTANT_BUFFERS];
405 
406    bool sysvals_need_upload;
407 
408    /** Shader Storage Buffers */
409    struct pipe_shader_buffer ssbo[PIPE_MAX_SHADER_BUFFERS];
410 
411    /** Shader Storage Images (image load store) */
412    struct crocus_image_view image[PIPE_MAX_SHADER_IMAGES];
413 
414    struct crocus_sampler_state *samplers[CROCUS_MAX_TEXTURE_SAMPLERS];
415    struct crocus_sampler_view *textures[CROCUS_MAX_TEXTURE_SAMPLERS];
416 
417    /** Bitfield of which constant buffers are bound (non-null). */
418    uint32_t bound_cbufs;
419 
420    /** Bitfield of which image views are bound (non-null). */
421    uint32_t bound_image_views;
422 
423    /** Bitfield of which sampler views are bound (non-null). */
424    uint32_t bound_sampler_views;
425 
426    /** Bitfield of which shader storage buffers are bound (non-null). */
427    uint32_t bound_ssbos;
428 
429    /** Bitfield of which shader storage buffers are writable. */
430    uint32_t writable_ssbos;
431 
432    uint32_t sampler_offset;
433 };
434 
435 /**
436  * The API context (derived from pipe_context).
437  *
438  * Most driver state is tracked here.
439  */
440 struct crocus_context {
441    struct pipe_context ctx;
442    struct threaded_context *thrctx;
443 
444    /** A debug callback for KHR_debug output. */
445    struct pipe_debug_callback dbg;
446 
447    /** A device reset status callback for notifying that the GPU is hosed. */
448    struct pipe_device_reset_callback reset;
449 
450    /** Slab allocator for crocus_transfer_map objects. */
451    struct slab_child_pool transfer_pool;
452 
453    /** Slab allocator for threaded_context's crocus_transfer_map objects */
454    struct slab_child_pool transfer_pool_unsync;
455 
456    struct blorp_context blorp;
457 
458    int batch_count;
459    struct crocus_batch batches[CROCUS_BATCH_COUNT];
460 
461    struct u_upload_mgr *query_buffer_uploader;
462 
463    struct blitter_context *blitter;
464 
465    struct {
466       struct {
467          /**
468           * Either the value of BaseVertex for indexed draw calls or the value
469           * of the argument <first> for non-indexed draw calls.
470           */
471          int firstvertex;
472          int baseinstance;
473       } params;
474 
475       /**
476        * Are the above values the ones stored in the draw_params buffer?
477        * If so, we can compare them against new values to see if anything
478        * changed.  If not, we need to assume they changed.
479        */
480       bool params_valid;
481 
482       /**
483        * Resource and offset that stores draw_parameters from the indirect
484        * buffer or to the buffer that stures the previous values for non
485        * indirect draws.
486        */
487       struct crocus_state_ref draw_params;
488 
489       struct {
490          /**
491           * The value of DrawID. This always comes in from it's own vertex
492           * buffer since it's not part of the indirect draw parameters.
493           */
494          int drawid;
495 
496          /**
497           * Stores if an indexed or non-indexed draw (~0/0). Useful to
498           * calculate BaseVertex as an AND of firstvertex and is_indexed_draw.
499           */
500          int is_indexed_draw;
501       } derived_params;
502 
503       /**
504        * Resource and offset used for GL_ARB_shader_draw_parameters which
505        * contains parameters that are not present in the indirect buffer as
506        * drawid and is_indexed_draw. They will go in their own vertex element.
507        */
508       struct crocus_state_ref derived_draw_params;
509    } draw;
510 
511    struct {
512       struct crocus_uncompiled_shader *uncompiled[MESA_SHADER_STAGES];
513       struct crocus_compiled_shader *prog[MESA_SHADER_STAGES];
514       struct brw_vue_map *last_vue_map;
515 
516       struct crocus_bo *cache_bo;
517       uint32_t cache_next_offset;
518       void *cache_bo_map;
519       struct hash_table *cache;
520 
521       unsigned urb_size;
522 
523       /* gen 4/5 clip/sf progs */
524       struct crocus_compiled_shader *clip_prog;
525       struct crocus_compiled_shader *sf_prog;
526       /* gen4/5 prims, gen6 streamout */
527       struct crocus_compiled_shader *ff_gs_prog;
528       uint32_t clip_offset;
529       uint32_t sf_offset;
530       uint32_t wm_offset;
531       uint32_t vs_offset;
532       uint32_t gs_offset;
533       uint32_t cc_offset;
534 
535       /** Is a GS or TES outputting points or lines? */
536       bool output_topology_is_points_or_lines;
537 
538       /* Track last VS URB entry size */
539       unsigned last_vs_entry_size;
540 
541       /**
542        * Scratch buffers for various sizes and stages.
543        *
544        * Indexed by the "Per-Thread Scratch Space" field's 4-bit encoding,
545        * and shader stage.
546        */
547       struct crocus_bo *scratch_bos[1 << 4][MESA_SHADER_STAGES];
548    } shaders;
549 
550    struct {
551       struct crocus_query *query;
552       bool condition;
553       enum pipe_render_cond_flag mode;
554    } condition;
555 
556    struct intel_perf_context *perf_ctx;
557 
558    struct {
559       uint64_t dirty;
560       uint64_t stage_dirty;
561       uint64_t stage_dirty_for_nos[CROCUS_NOS_COUNT];
562 
563       unsigned num_viewports;
564       unsigned sample_mask;
565       struct crocus_blend_state *cso_blend;
566       struct crocus_rasterizer_state *cso_rast;
567       struct crocus_depth_stencil_alpha_state *cso_zsa;
568       struct crocus_vertex_element_state *cso_vertex_elements;
569       struct pipe_blend_color blend_color;
570       struct pipe_poly_stipple poly_stipple;
571       struct pipe_viewport_state viewports[CROCUS_MAX_VIEWPORTS];
572       struct pipe_scissor_state scissors[CROCUS_MAX_VIEWPORTS];
573       struct pipe_stencil_ref stencil_ref;
574       struct pipe_framebuffer_state framebuffer;
575       struct pipe_clip_state clip_planes;
576 
577       float default_outer_level[4];
578       float default_inner_level[2];
579 
580       /** Bitfield of which vertex buffers are bound (non-null). */
581       uint32_t bound_vertex_buffers;
582       struct pipe_vertex_buffer vertex_buffers[16];
583       uint32_t vb_end[16];
584 
585       bool primitive_restart;
586       unsigned cut_index;
587       enum pipe_prim_type reduced_prim_mode:8;
588       enum pipe_prim_type prim_mode:8;
589       bool prim_is_points_or_lines;
590       uint8_t vertices_per_patch;
591       uint8_t patch_vertices;
592 
593       bool window_space_position;
594 
595       /** The last compute group size */
596       uint32_t last_block[3];
597 
598       /** The last compute grid size */
599       uint32_t last_grid[3];
600       /** Reference to the BO containing the compute grid size */
601       struct crocus_state_ref grid_size;
602 
603       /**
604        * Array of aux usages for drawing, altered to account for any
605        * self-dependencies from resources bound for sampling and rendering.
606        */
607       enum isl_aux_usage draw_aux_usage[BRW_MAX_DRAW_BUFFERS];
608 
609       /** Aux usage of the fb's depth buffer (which may or may not exist). */
610       enum isl_aux_usage hiz_usage;
611 
612       /** Bitfield of whether color blending is enabled for RT[i] */
613       uint8_t blend_enables;
614 
615       /** Are depth writes enabled?  (Depth buffer may or may not exist.) */
616       bool depth_writes_enabled;
617 
618       /** Are stencil writes enabled?  (Stencil buffer may or may not exist.) */
619       bool stencil_writes_enabled;
620 
621       /** GenX-specific current state */
622       struct crocus_genx_state *genx;
623 
624       struct crocus_shader_state shaders[MESA_SHADER_STAGES];
625 
626       /* track if geom shader is active for IVB GT2 workaround */
627       bool gs_enabled;
628       /** Do vertex shader uses shader draw parameters ? */
629       bool vs_uses_draw_params;
630       bool vs_uses_derived_draw_params;
631       bool vs_needs_sgvs_element;
632       bool vs_uses_vertexid;
633       bool vs_uses_instanceid;
634 
635       /** Do vertex shader uses edge flag ? */
636       bool vs_needs_edge_flag;
637 
638       struct pipe_stream_output_target *so_target[PIPE_MAX_SO_BUFFERS];
639       bool streamout_active;
640       int so_targets;
641 
642       bool statistics_counters_enabled;
643 
644       /** Current conditional rendering mode */
645       enum crocus_predicate_state predicate;
646       bool predicate_supported;
647 
648       /**
649        * Query BO with a MI_PREDICATE_RESULT snapshot calculated on the
650        * render context that needs to be uploaded to the compute context.
651        */
652       struct crocus_bo *compute_predicate;
653 
654       /** Is a PIPE_QUERY_PRIMITIVES_GENERATED query active? */
655       bool prims_generated_query_active;
656 
657       /** 3DSTATE_STREAMOUT and 3DSTATE_SO_DECL_LIST packets */
658       uint32_t *streamout;
659 
660       /**
661        * Resources containing streamed state which our render context
662        * currently points to.  Used to re-add these to the validation
663        * list when we start a new batch and haven't resubmitted commands.
664        */
665       struct {
666          struct pipe_resource *res;
667          uint32_t offset;
668          uint32_t size;
669          uint32_t index_size;
670          bool prim_restart;
671       } index_buffer;
672 
673       uint32_t sf_vp_address;
674       uint32_t clip_vp_address;
675       uint32_t cc_vp_address;
676 
677       uint32_t stats_wm;
678       float global_depth_offset_clamp;
679 
680       uint32_t last_xfb_verts_per_prim;
681       uint64_t svbi;
682    } state;
683 
684    /* BRW_NEW_URB_ALLOCATIONS:
685     */
686    struct {
687       uint32_t vsize;                /* vertex size plus header in urb registers */
688       uint32_t gsize;                /* GS output size in urb registers */
689       uint32_t hsize;             /* Tessellation control output size in urb registers */
690       uint32_t dsize;             /* Tessellation evaluation output size in urb registers */
691       uint32_t csize;                /* constant buffer size in urb registers */
692       uint32_t sfsize;                /* setup data size in urb registers */
693 
694       bool constrained;
695 
696       uint32_t nr_vs_entries;
697       uint32_t nr_hs_entries;
698       uint32_t nr_ds_entries;
699       uint32_t nr_gs_entries;
700       uint32_t nr_clip_entries;
701       uint32_t nr_sf_entries;
702       uint32_t nr_cs_entries;
703 
704       uint32_t vs_start;
705       uint32_t hs_start;
706       uint32_t ds_start;
707       uint32_t gs_start;
708       uint32_t clip_start;
709       uint32_t sf_start;
710       uint32_t cs_start;
711       /**
712        * URB size in the current configuration.  The units this is expressed
713        * in are somewhat inconsistent, see intel_device_info::urb::size.
714        *
715        * FINISHME: Represent the URB size consistently in KB on all platforms.
716        */
717       uint32_t size;
718 
719       /* True if the most recently sent _3DSTATE_URB message allocated
720        * URB space for the GS.
721        */
722       bool gs_present;
723 
724       /* True if the most recently sent _3DSTATE_URB message allocated
725        * URB space for the HS and DS.
726        */
727       bool tess_present;
728    } urb;
729 
730    /* GEN4/5 curbe */
731    struct {
732       unsigned wm_start;
733       unsigned wm_size;
734       unsigned clip_start;
735       unsigned clip_size;
736       unsigned vs_start;
737       unsigned vs_size;
738       unsigned total_size;
739 
740       struct crocus_resource *curbe_res;
741       unsigned curbe_offset;
742    } curbe;
743 
744    /**
745     * A buffer containing a marker + description of the driver. This buffer is
746     * added to all execbufs syscalls so that we can identify the driver that
747     * generated a hang by looking at the content of the buffer in the error
748     * state. It is also used for hardware workarounds that require scratch
749     * writes or reads from some unimportant memory. To avoid overriding the
750     * debug data, use the workaround_address field for workarounds.
751     */
752    struct crocus_bo *workaround_bo;
753    unsigned workaround_offset;
754 };
755 
756 #define perf_debug(dbg, ...) do {                      \
757    if (INTEL_DEBUG(DEBUG_PERF))                        \
758       dbg_printf(__VA_ARGS__);                         \
759    if (unlikely(dbg))                                  \
760       pipe_debug_message(dbg, PERF_INFO, __VA_ARGS__); \
761 } while(0)
762 
763 
764 struct pipe_context *
765 crocus_create_context(struct pipe_screen *screen, void *priv, unsigned flags);
766 
767 void crocus_lost_context_state(struct crocus_batch *batch);
768 
769 void crocus_init_blit_functions(struct pipe_context *ctx);
770 void crocus_init_clear_functions(struct pipe_context *ctx);
771 void crocus_init_program_functions(struct pipe_context *ctx);
772 void crocus_init_resource_functions(struct pipe_context *ctx);
773 bool crocus_update_compiled_shaders(struct crocus_context *ice);
774 void crocus_update_compiled_compute_shader(struct crocus_context *ice);
775 void crocus_fill_cs_push_const_buffer(struct brw_cs_prog_data *cs_prog_data,
776                                       unsigned threads, uint32_t *dst);
777 
778 
779 /* crocus_blit.c */
780 enum crocus_blitter_op
781 {
782    CROCUS_SAVE_TEXTURES      = 1,
783    CROCUS_SAVE_FRAMEBUFFER   = 2,
784    CROCUS_SAVE_FRAGMENT_STATE = 4,
785    CROCUS_DISABLE_RENDER_COND = 8,
786 };
787 void crocus_blitter_begin(struct crocus_context *ice, enum crocus_blitter_op op, bool render_cond);
788 
789 void crocus_blorp_surf_for_resource(struct crocus_vtable *vtbl,
790                                     struct isl_device *isl_dev,
791                                     struct blorp_surf *surf,
792                                     struct pipe_resource *p_res,
793                                     enum isl_aux_usage aux_usage,
794                                     unsigned level,
795                                     bool is_render_target);
796 void crocus_copy_region(struct blorp_context *blorp,
797                         struct crocus_batch *batch,
798                         struct pipe_resource *dst,
799                         unsigned dst_level,
800                         unsigned dstx, unsigned dsty, unsigned dstz,
801                         struct pipe_resource *src,
802                         unsigned src_level,
803                         const struct pipe_box *src_box);
804 
805 /* crocus_draw.c */
806 void crocus_draw_vbo(struct pipe_context *ctx,
807                      const struct pipe_draw_info *info,
808                      unsigned drawid_offset,
809                      const struct pipe_draw_indirect_info *indirect,
810                      const struct pipe_draw_start_count_bias *draws,
811                      unsigned num_draws);
812 void crocus_launch_grid(struct pipe_context *, const struct pipe_grid_info *);
813 
814 /* crocus_pipe_control.c */
815 
816 void crocus_emit_pipe_control_flush(struct crocus_batch *batch,
817                                     const char *reason, uint32_t flags);
818 void crocus_emit_pipe_control_write(struct crocus_batch *batch,
819                                     const char *reason, uint32_t flags,
820                                     struct crocus_bo *bo, uint32_t offset,
821                                     uint64_t imm);
822 void crocus_emit_mi_flush(struct crocus_batch *batch);
823 void crocus_emit_depth_stall_flushes(struct crocus_batch *batch);
824 void crocus_emit_post_sync_nonzero_flush(struct crocus_batch *batch);
825 void crocus_emit_end_of_pipe_sync(struct crocus_batch *batch,
826                                   const char *reason, uint32_t flags);
827 void crocus_flush_all_caches(struct crocus_batch *batch);
828 
829 #define crocus_handle_always_flush_cache(batch)                 \
830    if (unlikely(batch->screen->driconf.always_flush_cache))     \
831       crocus_flush_all_caches(batch);
832 
833 void crocus_init_flush_functions(struct pipe_context *ctx);
834 
835 /* crocus_program.c */
836 const struct shader_info *crocus_get_shader_info(const struct crocus_context *ice,
837                                                  gl_shader_stage stage);
838 struct crocus_bo *crocus_get_scratch_space(struct crocus_context *ice,
839                                            unsigned per_thread_scratch,
840                                            gl_shader_stage stage);
841 /**
842  * Map a <group, index> pair to a binding table index.
843  *
844  * For example: <UBO, 5> => binding table index 12
845  */
crocus_group_index_to_bti(const struct crocus_binding_table * bt,enum crocus_surface_group group,uint32_t index)846 static inline uint32_t crocus_group_index_to_bti(const struct crocus_binding_table *bt,
847                                                  enum crocus_surface_group group,
848                                                  uint32_t index)
849 {
850    assert(index < bt->sizes[group]);
851    uint64_t mask = bt->used_mask[group];
852    uint64_t bit = 1ull << index;
853    if (bit & mask) {
854       return bt->offsets[group] + util_bitcount64((bit - 1) & mask);
855    } else {
856       return CROCUS_SURFACE_NOT_USED;
857    }
858 }
859 
860 /**
861  * Map a binding table index back to a <group, index> pair.
862  *
863  * For example: binding table index 12 => <UBO, 5>
864  */
865 static inline uint32_t
crocus_bti_to_group_index(const struct crocus_binding_table * bt,enum crocus_surface_group group,uint32_t bti)866 crocus_bti_to_group_index(const struct crocus_binding_table *bt,
867                           enum crocus_surface_group group, uint32_t bti)
868 {
869    uint64_t used_mask = bt->used_mask[group];
870    assert(bti >= bt->offsets[group]);
871 
872    uint32_t c = bti - bt->offsets[group];
873    while (used_mask) {
874       int i = u_bit_scan64(&used_mask);
875       if (c == 0)
876          return i;
877       c--;
878    }
879 
880    return CROCUS_SURFACE_NOT_USED;
881 }
882 
883 
884 /* crocus_disk_cache.c */
885 
886 void crocus_disk_cache_store(struct disk_cache *cache,
887                              const struct crocus_uncompiled_shader *ish,
888                              const struct crocus_compiled_shader *shader,
889                              void *map,
890                              const void *prog_key,
891                              uint32_t prog_key_size);
892 struct crocus_compiled_shader *
893 crocus_disk_cache_retrieve(struct crocus_context *ice,
894                            const struct crocus_uncompiled_shader *ish,
895                            const void *prog_key,
896                            uint32_t prog_key_size);
897 
898 /* crocus_program_cache.c */
899 
900 void crocus_init_program_cache(struct crocus_context *ice);
901 void crocus_destroy_program_cache(struct crocus_context *ice);
902 void crocus_print_program_cache(struct crocus_context *ice);
903 struct crocus_compiled_shader *crocus_find_cached_shader(struct crocus_context *ice,
904                                                          enum crocus_program_cache_id,
905                                                          uint32_t key_size,
906                                                          const void *key);
907 struct crocus_compiled_shader *crocus_upload_shader(struct crocus_context *ice,
908                                                     enum crocus_program_cache_id,
909                                                     uint32_t key_size,
910                                                     const void *key,
911                                                     const void *assembly,
912                                                     uint32_t asm_size,
913                                                     struct brw_stage_prog_data *,
914                                                     uint32_t prog_data_size,
915                                                     uint32_t *streamout,
916                                                     enum brw_param_builtin *sysv,
917                                                     unsigned num_system_values,
918                                                     unsigned num_cbufs,
919                                                     const struct crocus_binding_table *bt);
920 const void *crocus_find_previous_compile(const struct crocus_context *ice,
921                                          enum crocus_program_cache_id cache_id,
922                                          unsigned program_string_id);
923 bool crocus_blorp_lookup_shader(struct blorp_batch *blorp_batch,
924                                 const void *key,
925                                 uint32_t key_size,
926                                 uint32_t *kernel_out,
927                                 void *prog_data_out);
928 bool crocus_blorp_upload_shader(struct blorp_batch *blorp_batch,
929                                 uint32_t stage,
930                                 const void *key, uint32_t key_size,
931                                 const void *kernel, uint32_t kernel_size,
932                                 const struct brw_stage_prog_data *prog_data,
933                                 uint32_t prog_data_size,
934                                 uint32_t *kernel_out,
935                                 void *prog_data_out);
936 
937 /* crocus_resolve.c */
938 
939 void crocus_predraw_resolve_inputs(struct crocus_context *ice,
940                                    struct crocus_batch *batch,
941                                    bool *draw_aux_buffer_disabled,
942                                    gl_shader_stage stage,
943                                    bool consider_framebuffer);
944 void crocus_predraw_resolve_framebuffer(struct crocus_context *ice,
945                                         struct crocus_batch *batch,
946                                         bool *draw_aux_buffer_disabled);
947 void crocus_postdraw_update_resolve_tracking(struct crocus_context *ice,
948                                              struct crocus_batch *batch);
949 void crocus_cache_sets_clear(struct crocus_batch *batch);
950 void crocus_flush_depth_and_render_caches(struct crocus_batch *batch);
951 void crocus_cache_flush_for_read(struct crocus_batch *batch, struct crocus_bo *bo);
952 void crocus_cache_flush_for_render(struct crocus_batch *batch,
953                                    struct crocus_bo *bo,
954                                    enum isl_format format,
955                                    enum isl_aux_usage aux_usage);
956 void crocus_render_cache_add_bo(struct crocus_batch *batch,
957                                 struct crocus_bo *bo,
958                                 enum isl_format format,
959                                 enum isl_aux_usage aux_usage);
960 void crocus_cache_flush_for_depth(struct crocus_batch *batch, struct crocus_bo *bo);
961 void crocus_depth_cache_add_bo(struct crocus_batch *batch, struct crocus_bo *bo);
962 int crocus_get_driver_query_info(struct pipe_screen *pscreen, unsigned index,
963                                  struct pipe_driver_query_info *info);
964 int crocus_get_driver_query_group_info(struct pipe_screen *pscreen,
965                                        unsigned index,
966                                        struct pipe_driver_query_group_info *info);
967 
968 struct pipe_rasterizer_state *crocus_get_rast_state(struct crocus_context *ctx);
969 
970 bool crocus_sw_check_cond_render(struct crocus_context *ice);
crocus_check_conditional_render(struct crocus_context * ice)971 static inline bool crocus_check_conditional_render(struct crocus_context *ice)
972 {
973    if (ice->state.predicate == CROCUS_PREDICATE_STATE_STALL_FOR_QUERY)
974       return crocus_sw_check_cond_render(ice);
975    return ice->state.predicate != CROCUS_PREDICATE_STATE_DONT_RENDER;
976 }
977 
978 #ifdef genX
979 #  include "crocus_genx_protos.h"
980 #else
981 #  define genX(x) gfx4_##x
982 #  include "crocus_genx_protos.h"
983 #  undef genX
984 #  define genX(x) gfx45_##x
985 #  include "crocus_genx_protos.h"
986 #  undef genX
987 #  define genX(x) gfx5_##x
988 #  include "crocus_genx_protos.h"
989 #  undef genX
990 #  define genX(x) gfx6_##x
991 #  include "crocus_genx_protos.h"
992 #  undef genX
993 #  define genX(x) gfx7_##x
994 #  include "crocus_genx_protos.h"
995 #  undef genX
996 #  define genX(x) gfx75_##x
997 #  include "crocus_genx_protos.h"
998 #  undef genX
999 #  define genX(x) gfx8_##x
1000 #  include "crocus_genx_protos.h"
1001 #  undef genX
1002 #endif
1003 
1004 #endif
1005