1 /*
2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3  Intel funded Tungsten Graphics to
4  develop this 3D driver.
5 
6  Permission is hereby granted, free of charge, to any person obtaining
7  a copy of this software and associated documentation files (the
8  "Software"), to deal in the Software without restriction, including
9  without limitation the rights to use, copy, modify, merge, publish,
10  distribute, sublicense, and/or sell copies of the Software, and to
11  permit persons to whom the Software is furnished to do so, subject to
12  the following conditions:
13 
14  The above copyright notice and this permission notice (including the
15  next paragraph) shall be included in all copies or substantial
16  portions of the Software.
17 
18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 
26  **********************************************************************/
27  /*
28   * Authors:
29   *   Keith Whitwell <keithw@vmware.com>
30   */
31 
32 
33 
34 #include "brw_context.h"
35 #include "brw_defines.h"
36 #include "brw_state.h"
37 #include "brw_program.h"
38 #include "drivers/common/meta.h"
39 #include "brw_batch.h"
40 #include "brw_buffers.h"
41 #include "brw_vs.h"
42 #include "brw_ff_gs.h"
43 #include "brw_gs.h"
44 #include "brw_wm.h"
45 #include "brw_cs.h"
46 #include "genxml/genX_bits.h"
47 #include "main/framebuffer.h"
48 
49 void
brw_enable_obj_preemption(struct brw_context * brw,bool enable)50 brw_enable_obj_preemption(struct brw_context *brw, bool enable)
51 {
52    ASSERTED const struct intel_device_info *devinfo = &brw->screen->devinfo;
53    assert(devinfo->ver >= 9);
54 
55    if (enable == brw->object_preemption)
56       return;
57 
58    /* A fixed function pipe flush is required before modifying this field */
59    brw_emit_end_of_pipe_sync(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH);
60 
61    bool replay_mode = enable ?
62       GFX9_REPLAY_MODE_MIDOBJECT : GFX9_REPLAY_MODE_MIDBUFFER;
63 
64    /* enable object level preemption */
65    brw_load_register_imm32(brw, CS_CHICKEN1,
66                            replay_mode | GFX9_REPLAY_MODE_MASK);
67 
68    brw->object_preemption = enable;
69 }
70 
71 static void
brw_upload_gfx11_slice_hashing_state(struct brw_context * brw)72 brw_upload_gfx11_slice_hashing_state(struct brw_context *brw)
73 {
74    const struct intel_device_info *devinfo = &brw->screen->devinfo;
75    int subslices_delta =
76       devinfo->ppipe_subslices[0] - devinfo->ppipe_subslices[1];
77    if (subslices_delta == 0)
78       return;
79 
80    unsigned size = GFX11_SLICE_HASH_TABLE_length * 4;
81    uint32_t hash_address;
82 
83    uint32_t *map = brw_state_batch(brw, size, 64, &hash_address);
84 
85    unsigned idx = 0;
86 
87    unsigned sl_small = 0;
88    unsigned sl_big = 1;
89    if (subslices_delta > 0) {
90       sl_small = 1;
91       sl_big = 0;
92    }
93 
94    /**
95     * Create a 16x16 slice hashing table like the following one:
96     *
97     * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
98     * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
99     * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
100     * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
101     * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
102     * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
103     * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
104     * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
105     * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
106     * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
107     * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
108     * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
109     * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
110     * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
111     * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
112     * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
113     *
114     * The table above is used when the pixel pipe 0 has less subslices than
115     * pixel pipe 1. When pixel pipe 0 has more subslices, then a similar table
116     * with 0's and 1's inverted is used.
117     */
118    for (int i = 0; i < GFX11_SLICE_HASH_TABLE_length; i++) {
119       uint32_t dw = 0;
120 
121       for (int j = 0; j < 8; j++) {
122          unsigned slice = idx++ % 3 ? sl_big : sl_small;
123          dw |= slice << (j * 4);
124       }
125       map[i] = dw;
126    }
127 
128    BEGIN_BATCH(2);
129    OUT_BATCH(_3DSTATE_SLICE_TABLE_STATE_POINTERS << 16 | (2 - 2));
130    OUT_RELOC(brw->batch.state.bo, 0, hash_address | 1);
131    ADVANCE_BATCH();
132 
133    /* From gfx10/gfx11 workaround table in h/w specs:
134     *
135     *    "On 3DSTATE_3D_MODE, driver must always program bits 31:16 of DW1
136     *     a value of 0xFFFF"
137     *
138     * This means that whenever we update a field with this instruction, we need
139     * to update all the others.
140     *
141     * Since this is the first time we emit this
142     * instruction, we are only setting the fSLICE_HASHING_TABLE_ENABLE flag,
143     * and leaving everything else at their default state (0).
144     */
145    BEGIN_BATCH(2);
146    OUT_BATCH(_3DSTATE_3D_MODE  << 16 | (2 - 2));
147    OUT_BATCH(0xffff0000 | SLICE_HASHING_TABLE_ENABLE);
148    ADVANCE_BATCH();
149 }
150 
151 static void
brw_upload_initial_gpu_state(struct brw_context * brw)152 brw_upload_initial_gpu_state(struct brw_context *brw)
153 {
154    const struct intel_device_info *devinfo = &brw->screen->devinfo;
155    const struct brw_compiler *compiler = brw->screen->compiler;
156 
157    /* On platforms with hardware contexts, we can set our initial GPU state
158     * right away rather than doing it via state atoms.  This saves a small
159     * amount of overhead on every draw call.
160     */
161    if (!brw->hw_ctx)
162       return;
163 
164    if (devinfo->ver == 6)
165       brw_emit_post_sync_nonzero_flush(brw);
166 
167    brw_upload_invariant_state(brw);
168 
169    if (devinfo->ver == 11) {
170       /* The default behavior of bit 5 "Headerless Message for Pre-emptable
171        * Contexts" in SAMPLER MODE register is set to 0, which means
172        * headerless sampler messages are not allowed for pre-emptable
173        * contexts. Set the bit 5 to 1 to allow them.
174        */
175       brw_load_register_imm32(brw, GFX11_SAMPLER_MODE,
176                               HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS_MASK |
177                               HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS);
178 
179       /* Bit 1 "Enabled Texel Offset Precision Fix" must be set in
180        * HALF_SLICE_CHICKEN7 register.
181        */
182       brw_load_register_imm32(brw, HALF_SLICE_CHICKEN7,
183                               TEXEL_OFFSET_FIX_MASK |
184                               TEXEL_OFFSET_FIX_ENABLE);
185 
186       /* Wa_1406697149: Bit 9 "Error Detection Behavior Control" must be set
187        * in L3CNTLREG register. The default setting of the bit is not the
188        * desirable behavior.
189        */
190       brw_load_register_imm32(brw, GFX8_L3CNTLREG,
191                               GFX8_L3CNTLREG_EDBC_NO_HANG);
192    }
193 
194    /* hardware specification recommends disabling repacking for
195     * the compatibility with decompression mechanism in display controller.
196     */
197    if (devinfo->disable_ccs_repack) {
198       brw_load_register_imm32(brw, GFX7_CACHE_MODE_0,
199                               GFX11_DISABLE_REPACKING_FOR_COMPRESSION |
200                               REG_MASK(GFX11_DISABLE_REPACKING_FOR_COMPRESSION));
201    }
202 
203    if (devinfo->ver == 9) {
204       /* Recommended optimizations for Victim Cache eviction and floating
205        * point blending.
206        */
207       brw_load_register_imm32(brw, GFX7_CACHE_MODE_1,
208                               REG_MASK(GFX9_FLOAT_BLEND_OPTIMIZATION_ENABLE) |
209                               REG_MASK(GFX9_MSC_RAW_HAZARD_AVOIDANCE_BIT) |
210                               REG_MASK(GFX9_PARTIAL_RESOLVE_DISABLE_IN_VC) |
211                               GFX9_FLOAT_BLEND_OPTIMIZATION_ENABLE |
212                               GFX9_MSC_RAW_HAZARD_AVOIDANCE_BIT |
213                               GFX9_PARTIAL_RESOLVE_DISABLE_IN_VC);
214    }
215 
216    if (devinfo->ver >= 8) {
217       gfx8_emit_3dstate_sample_pattern(brw);
218 
219       BEGIN_BATCH(5);
220       OUT_BATCH(_3DSTATE_WM_HZ_OP << 16 | (5 - 2));
221       OUT_BATCH(0);
222       OUT_BATCH(0);
223       OUT_BATCH(0);
224       OUT_BATCH(0);
225       ADVANCE_BATCH();
226 
227       BEGIN_BATCH(2);
228       OUT_BATCH(_3DSTATE_WM_CHROMAKEY << 16 | (2 - 2));
229       OUT_BATCH(0);
230       ADVANCE_BATCH();
231    }
232 
233    /* Set the "CONSTANT_BUFFER Address Offset Disable" bit, so
234     * 3DSTATE_CONSTANT_XS buffer 0 is an absolute address.
235     *
236     * This is only safe on kernels with context isolation support.
237     */
238    if (!compiler->constant_buffer_0_is_relative) {
239       if (devinfo->ver >= 9) {
240          BEGIN_BATCH(3);
241          OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
242          OUT_BATCH(CS_DEBUG_MODE2);
243          OUT_BATCH(REG_MASK(CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE) |
244                    CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE);
245          ADVANCE_BATCH();
246       } else if (devinfo->ver == 8) {
247          BEGIN_BATCH(3);
248          OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
249          OUT_BATCH(INSTPM);
250          OUT_BATCH(REG_MASK(INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE) |
251                    INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE);
252          ADVANCE_BATCH();
253       }
254    }
255 
256    brw->object_preemption = false;
257 
258    if (devinfo->ver >= 10)
259       brw_enable_obj_preemption(brw, true);
260 
261    if (devinfo->ver == 11)
262       brw_upload_gfx11_slice_hashing_state(brw);
263 }
264 
265 static inline const struct brw_tracked_state *
brw_get_pipeline_atoms(struct brw_context * brw,enum brw_pipeline pipeline)266 brw_get_pipeline_atoms(struct brw_context *brw,
267                        enum brw_pipeline pipeline)
268 {
269    switch (pipeline) {
270    case BRW_RENDER_PIPELINE:
271       return brw->render_atoms;
272    case BRW_COMPUTE_PIPELINE:
273       return brw->compute_atoms;
274    default:
275       STATIC_ASSERT(BRW_NUM_PIPELINES == 2);
276       unreachable("Unsupported pipeline");
277       return NULL;
278    }
279 }
280 
281 void
brw_copy_pipeline_atoms(struct brw_context * brw,enum brw_pipeline pipeline,const struct brw_tracked_state ** atoms,int num_atoms)282 brw_copy_pipeline_atoms(struct brw_context *brw,
283                         enum brw_pipeline pipeline,
284                         const struct brw_tracked_state **atoms,
285                         int num_atoms)
286 {
287    /* This is to work around brw_context::atoms being declared const.  We want
288     * it to be const, but it needs to be initialized somehow!
289     */
290    struct brw_tracked_state *context_atoms =
291       (struct brw_tracked_state *) brw_get_pipeline_atoms(brw, pipeline);
292 
293    for (int i = 0; i < num_atoms; i++) {
294       context_atoms[i] = *atoms[i];
295       assert(context_atoms[i].dirty.mesa | context_atoms[i].dirty.brw);
296       assert(context_atoms[i].emit);
297    }
298 
299    brw->num_atoms[pipeline] = num_atoms;
300 }
301 
brw_init_state(struct brw_context * brw)302 void brw_init_state( struct brw_context *brw )
303 {
304    struct gl_context *ctx = &brw->ctx;
305    const struct intel_device_info *devinfo = &brw->screen->devinfo;
306 
307    /* Force the first brw_select_pipeline to emit pipeline select */
308    brw->last_pipeline = BRW_NUM_PIPELINES;
309 
310    brw_init_caches(brw);
311 
312    if (devinfo->ver >= 11)
313       gfx11_init_atoms(brw);
314    else if (devinfo->ver >= 10)
315       unreachable("Gfx10 support dropped.");
316    else if (devinfo->ver >= 9)
317       gfx9_init_atoms(brw);
318    else if (devinfo->ver >= 8)
319       gfx8_init_atoms(brw);
320    else if (devinfo->is_haswell)
321       gfx75_init_atoms(brw);
322    else if (devinfo->ver >= 7)
323       gfx7_init_atoms(brw);
324    else if (devinfo->ver >= 6)
325       gfx6_init_atoms(brw);
326    else if (devinfo->ver >= 5)
327       gfx5_init_atoms(brw);
328    else if (devinfo->is_g4x)
329       gfx45_init_atoms(brw);
330    else
331       gfx4_init_atoms(brw);
332 
333    brw_upload_initial_gpu_state(brw);
334 
335    brw->NewGLState = ~0;
336    brw->ctx.NewDriverState = ~0ull;
337 
338    /* ~0 is a nonsensical value which won't match anything we program, so
339     * the programming will take effect on the first time around.
340     */
341    brw->pma_stall_bits = ~0;
342 
343    /* Make sure that brw->ctx.NewDriverState has enough bits to hold all possible
344     * dirty flags.
345     */
346    STATIC_ASSERT(BRW_NUM_STATE_BITS <= 8 * sizeof(brw->ctx.NewDriverState));
347 
348    ctx->DriverFlags.NewTransformFeedback = BRW_NEW_TRANSFORM_FEEDBACK;
349    ctx->DriverFlags.NewTransformFeedbackProg = BRW_NEW_TRANSFORM_FEEDBACK;
350    ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD;
351    ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
352    ctx->DriverFlags.NewShaderStorageBuffer = BRW_NEW_UNIFORM_BUFFER;
353    ctx->DriverFlags.NewTextureBuffer = BRW_NEW_TEXTURE_BUFFER;
354    ctx->DriverFlags.NewAtomicBuffer = BRW_NEW_UNIFORM_BUFFER;
355    ctx->DriverFlags.NewImageUnits = BRW_NEW_IMAGE_UNITS;
356    ctx->DriverFlags.NewTessState = BRW_NEW_DEFAULT_TESS_LEVELS;
357    ctx->DriverFlags.NewIntelConservativeRasterization = BRW_NEW_CONSERVATIVE_RASTERIZATION;
358 }
359 
360 
brw_destroy_state(struct brw_context * brw)361 void brw_destroy_state( struct brw_context *brw )
362 {
363    brw_destroy_caches(brw);
364 }
365 
366 /***********************************************************************
367  */
368 
369 static bool
check_state(const struct brw_state_flags * a,const struct brw_state_flags * b)370 check_state(const struct brw_state_flags *a, const struct brw_state_flags *b)
371 {
372    return ((a->mesa & b->mesa) | (a->brw & b->brw)) != 0;
373 }
374 
375 static void
accumulate_state(struct brw_state_flags * a,const struct brw_state_flags * b)376 accumulate_state(struct brw_state_flags *a, const struct brw_state_flags *b)
377 {
378    a->mesa |= b->mesa;
379    a->brw |= b->brw;
380 }
381 
382 
383 static void
xor_states(struct brw_state_flags * result,const struct brw_state_flags * a,const struct brw_state_flags * b)384 xor_states(struct brw_state_flags *result,
385            const struct brw_state_flags *a,
386            const struct brw_state_flags *b)
387 {
388    result->mesa = a->mesa ^ b->mesa;
389    result->brw = a->brw ^ b->brw;
390 }
391 
392 struct dirty_bit_map {
393    uint64_t bit;
394    char *name;
395    uint32_t count;
396 };
397 
398 #define DEFINE_BIT(name) {name, #name, 0}
399 
400 static struct dirty_bit_map mesa_bits[] = {
401    DEFINE_BIT(_NEW_MODELVIEW),
402    DEFINE_BIT(_NEW_PROJECTION),
403    DEFINE_BIT(_NEW_TEXTURE_MATRIX),
404    DEFINE_BIT(_NEW_COLOR),
405    DEFINE_BIT(_NEW_DEPTH),
406    DEFINE_BIT(_NEW_FOG),
407    DEFINE_BIT(_NEW_HINT),
408    DEFINE_BIT(_NEW_LIGHT),
409    DEFINE_BIT(_NEW_LINE),
410    DEFINE_BIT(_NEW_PIXEL),
411    DEFINE_BIT(_NEW_POINT),
412    DEFINE_BIT(_NEW_POLYGON),
413    DEFINE_BIT(_NEW_POLYGONSTIPPLE),
414    DEFINE_BIT(_NEW_SCISSOR),
415    DEFINE_BIT(_NEW_STENCIL),
416    DEFINE_BIT(_NEW_TEXTURE_OBJECT),
417    DEFINE_BIT(_NEW_TRANSFORM),
418    DEFINE_BIT(_NEW_VIEWPORT),
419    DEFINE_BIT(_NEW_TEXTURE_STATE),
420    DEFINE_BIT(_NEW_RENDERMODE),
421    DEFINE_BIT(_NEW_BUFFERS),
422    DEFINE_BIT(_NEW_CURRENT_ATTRIB),
423    DEFINE_BIT(_NEW_MULTISAMPLE),
424    DEFINE_BIT(_NEW_TRACK_MATRIX),
425    DEFINE_BIT(_NEW_PROGRAM),
426    DEFINE_BIT(_NEW_PROGRAM_CONSTANTS),
427    DEFINE_BIT(_NEW_FRAG_CLAMP),
428    {0, 0, 0}
429 };
430 
431 static struct dirty_bit_map brw_bits[] = {
432    DEFINE_BIT(BRW_NEW_FS_PROG_DATA),
433    DEFINE_BIT(BRW_NEW_BLORP_BLIT_PROG_DATA),
434    DEFINE_BIT(BRW_NEW_SF_PROG_DATA),
435    DEFINE_BIT(BRW_NEW_VS_PROG_DATA),
436    DEFINE_BIT(BRW_NEW_FF_GS_PROG_DATA),
437    DEFINE_BIT(BRW_NEW_GS_PROG_DATA),
438    DEFINE_BIT(BRW_NEW_TCS_PROG_DATA),
439    DEFINE_BIT(BRW_NEW_TES_PROG_DATA),
440    DEFINE_BIT(BRW_NEW_CLIP_PROG_DATA),
441    DEFINE_BIT(BRW_NEW_CS_PROG_DATA),
442    DEFINE_BIT(BRW_NEW_URB_FENCE),
443    DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM),
444    DEFINE_BIT(BRW_NEW_GEOMETRY_PROGRAM),
445    DEFINE_BIT(BRW_NEW_TESS_PROGRAMS),
446    DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM),
447    DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE),
448    DEFINE_BIT(BRW_NEW_PATCH_PRIMITIVE),
449    DEFINE_BIT(BRW_NEW_PRIMITIVE),
450    DEFINE_BIT(BRW_NEW_CONTEXT),
451    DEFINE_BIT(BRW_NEW_PSP),
452    DEFINE_BIT(BRW_NEW_SURFACES),
453    DEFINE_BIT(BRW_NEW_BINDING_TABLE_POINTERS),
454    DEFINE_BIT(BRW_NEW_INDICES),
455    DEFINE_BIT(BRW_NEW_VERTICES),
456    DEFINE_BIT(BRW_NEW_DEFAULT_TESS_LEVELS),
457    DEFINE_BIT(BRW_NEW_BATCH),
458    DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
459    DEFINE_BIT(BRW_NEW_VS_CONSTBUF),
460    DEFINE_BIT(BRW_NEW_TCS_CONSTBUF),
461    DEFINE_BIT(BRW_NEW_TES_CONSTBUF),
462    DEFINE_BIT(BRW_NEW_GS_CONSTBUF),
463    DEFINE_BIT(BRW_NEW_PROGRAM_CACHE),
464    DEFINE_BIT(BRW_NEW_STATE_BASE_ADDRESS),
465    DEFINE_BIT(BRW_NEW_VUE_MAP_GEOM_OUT),
466    DEFINE_BIT(BRW_NEW_TRANSFORM_FEEDBACK),
467    DEFINE_BIT(BRW_NEW_RASTERIZER_DISCARD),
468    DEFINE_BIT(BRW_NEW_STATS_WM),
469    DEFINE_BIT(BRW_NEW_UNIFORM_BUFFER),
470    DEFINE_BIT(BRW_NEW_IMAGE_UNITS),
471    DEFINE_BIT(BRW_NEW_META_IN_PROGRESS),
472    DEFINE_BIT(BRW_NEW_PUSH_CONSTANT_ALLOCATION),
473    DEFINE_BIT(BRW_NEW_NUM_SAMPLES),
474    DEFINE_BIT(BRW_NEW_TEXTURE_BUFFER),
475    DEFINE_BIT(BRW_NEW_GFX4_UNIT_STATE),
476    DEFINE_BIT(BRW_NEW_CC_VP),
477    DEFINE_BIT(BRW_NEW_SF_VP),
478    DEFINE_BIT(BRW_NEW_CLIP_VP),
479    DEFINE_BIT(BRW_NEW_SAMPLER_STATE_TABLE),
480    DEFINE_BIT(BRW_NEW_VS_ATTRIB_WORKAROUNDS),
481    DEFINE_BIT(BRW_NEW_COMPUTE_PROGRAM),
482    DEFINE_BIT(BRW_NEW_CS_WORK_GROUPS),
483    DEFINE_BIT(BRW_NEW_URB_SIZE),
484    DEFINE_BIT(BRW_NEW_CC_STATE),
485    DEFINE_BIT(BRW_NEW_BLORP),
486    DEFINE_BIT(BRW_NEW_VIEWPORT_COUNT),
487    DEFINE_BIT(BRW_NEW_CONSERVATIVE_RASTERIZATION),
488    DEFINE_BIT(BRW_NEW_DRAW_CALL),
489    DEFINE_BIT(BRW_NEW_AUX_STATE),
490    {0, 0, 0}
491 };
492 
493 static void
brw_update_dirty_count(struct dirty_bit_map * bit_map,uint64_t bits)494 brw_update_dirty_count(struct dirty_bit_map *bit_map, uint64_t bits)
495 {
496    for (int i = 0; bit_map[i].bit != 0; i++) {
497       if (bit_map[i].bit & bits)
498          bit_map[i].count++;
499    }
500 }
501 
502 static void
brw_print_dirty_count(struct dirty_bit_map * bit_map)503 brw_print_dirty_count(struct dirty_bit_map *bit_map)
504 {
505    for (int i = 0; bit_map[i].bit != 0; i++) {
506       if (bit_map[i].count > 1) {
507          fprintf(stderr, "0x%016"PRIx64": %12d (%s)\n",
508                  bit_map[i].bit, bit_map[i].count, bit_map[i].name);
509       }
510    }
511 }
512 
513 static inline void
brw_upload_tess_programs(struct brw_context * brw)514 brw_upload_tess_programs(struct brw_context *brw)
515 {
516    if (brw->programs[MESA_SHADER_TESS_EVAL]) {
517       brw_upload_tcs_prog(brw);
518       brw_upload_tes_prog(brw);
519    } else {
520       brw->tcs.base.prog_data = NULL;
521       brw->tes.base.prog_data = NULL;
522    }
523 }
524 
525 static inline void
brw_upload_programs(struct brw_context * brw,enum brw_pipeline pipeline)526 brw_upload_programs(struct brw_context *brw,
527                     enum brw_pipeline pipeline)
528 {
529    struct gl_context *ctx = &brw->ctx;
530    const struct intel_device_info *devinfo = &brw->screen->devinfo;
531 
532    if (pipeline == BRW_RENDER_PIPELINE) {
533       brw_upload_vs_prog(brw);
534       brw_upload_tess_programs(brw);
535 
536       if (brw->programs[MESA_SHADER_GEOMETRY]) {
537          brw_upload_gs_prog(brw);
538       } else {
539          brw->gs.base.prog_data = NULL;
540          if (devinfo->ver < 7)
541             brw_upload_ff_gs_prog(brw);
542       }
543 
544       /* Update the VUE map for data exiting the GS stage of the pipeline.
545        * This comes from the last enabled shader stage.
546        */
547       GLbitfield64 old_slots = brw->vue_map_geom_out.slots_valid;
548       bool old_separate = brw->vue_map_geom_out.separate;
549       struct brw_vue_prog_data *vue_prog_data;
550       if (brw->programs[MESA_SHADER_GEOMETRY])
551          vue_prog_data = brw_vue_prog_data(brw->gs.base.prog_data);
552       else if (brw->programs[MESA_SHADER_TESS_EVAL])
553          vue_prog_data = brw_vue_prog_data(brw->tes.base.prog_data);
554       else
555          vue_prog_data = brw_vue_prog_data(brw->vs.base.prog_data);
556 
557       brw->vue_map_geom_out = vue_prog_data->vue_map;
558 
559       /* If the layout has changed, signal BRW_NEW_VUE_MAP_GEOM_OUT. */
560       if (old_slots != brw->vue_map_geom_out.slots_valid ||
561           old_separate != brw->vue_map_geom_out.separate)
562          brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT;
563 
564       if ((old_slots ^ brw->vue_map_geom_out.slots_valid) &
565           VARYING_BIT_VIEWPORT) {
566          ctx->NewDriverState |= BRW_NEW_VIEWPORT_COUNT;
567          brw->clip.viewport_count =
568             (brw->vue_map_geom_out.slots_valid & VARYING_BIT_VIEWPORT) ?
569             ctx->Const.MaxViewports : 1;
570       }
571 
572       brw_upload_wm_prog(brw);
573 
574       if (devinfo->ver < 6) {
575          brw_upload_clip_prog(brw);
576          brw_upload_sf_prog(brw);
577       }
578 
579       brw_disk_cache_write_render_programs(brw);
580    } else if (pipeline == BRW_COMPUTE_PIPELINE) {
581       brw_upload_cs_prog(brw);
582       brw_disk_cache_write_compute_program(brw);
583    }
584 }
585 
586 static inline void
merge_ctx_state(struct brw_context * brw,struct brw_state_flags * state)587 merge_ctx_state(struct brw_context *brw,
588                 struct brw_state_flags *state)
589 {
590    state->mesa |= brw->NewGLState;
591    state->brw |= brw->ctx.NewDriverState;
592 }
593 
594 static ALWAYS_INLINE void
check_and_emit_atom(struct brw_context * brw,struct brw_state_flags * state,const struct brw_tracked_state * atom)595 check_and_emit_atom(struct brw_context *brw,
596                     struct brw_state_flags *state,
597                     const struct brw_tracked_state *atom)
598 {
599    if (check_state(state, &atom->dirty)) {
600       atom->emit(brw);
601       merge_ctx_state(brw, state);
602    }
603 }
604 
605 static inline void
brw_upload_pipeline_state(struct brw_context * brw,enum brw_pipeline pipeline)606 brw_upload_pipeline_state(struct brw_context *brw,
607                           enum brw_pipeline pipeline)
608 {
609    const struct intel_device_info *devinfo = &brw->screen->devinfo;
610    struct gl_context *ctx = &brw->ctx;
611    int i;
612    static int dirty_count = 0;
613    struct brw_state_flags state = brw->state.pipelines[pipeline];
614    const unsigned fb_samples =
615       MAX2(_mesa_geometric_samples(ctx->DrawBuffer), 1);
616 
617    brw_select_pipeline(brw, pipeline);
618 
619    if (pipeline == BRW_RENDER_PIPELINE && brw->current_hash_scale != 1)
620       brw_emit_hashing_mode(brw, UINT_MAX, UINT_MAX, 1);
621 
622    if (INTEL_DEBUG(DEBUG_REEMIT)) {
623       /* Always re-emit all state. */
624       brw->NewGLState = ~0;
625       ctx->NewDriverState = ~0ull;
626    }
627 
628    if (pipeline == BRW_RENDER_PIPELINE) {
629       if (brw->programs[MESA_SHADER_FRAGMENT] !=
630           ctx->FragmentProgram._Current) {
631          brw->programs[MESA_SHADER_FRAGMENT] = ctx->FragmentProgram._Current;
632          brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
633       }
634 
635       if (brw->programs[MESA_SHADER_TESS_EVAL] !=
636           ctx->TessEvalProgram._Current) {
637          brw->programs[MESA_SHADER_TESS_EVAL] = ctx->TessEvalProgram._Current;
638          brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
639       }
640 
641       if (brw->programs[MESA_SHADER_TESS_CTRL] !=
642           ctx->TessCtrlProgram._Current) {
643          brw->programs[MESA_SHADER_TESS_CTRL] = ctx->TessCtrlProgram._Current;
644          brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
645       }
646 
647       if (brw->programs[MESA_SHADER_GEOMETRY] !=
648           ctx->GeometryProgram._Current) {
649          brw->programs[MESA_SHADER_GEOMETRY] = ctx->GeometryProgram._Current;
650          brw->ctx.NewDriverState |= BRW_NEW_GEOMETRY_PROGRAM;
651       }
652 
653       if (brw->programs[MESA_SHADER_VERTEX] != ctx->VertexProgram._Current) {
654          brw->programs[MESA_SHADER_VERTEX] = ctx->VertexProgram._Current;
655          brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
656       }
657    }
658 
659    if (brw->programs[MESA_SHADER_COMPUTE] != ctx->ComputeProgram._Current) {
660       brw->programs[MESA_SHADER_COMPUTE] = ctx->ComputeProgram._Current;
661       brw->ctx.NewDriverState |= BRW_NEW_COMPUTE_PROGRAM;
662    }
663 
664    if (brw->meta_in_progress != _mesa_meta_in_progress(ctx)) {
665       brw->meta_in_progress = _mesa_meta_in_progress(ctx);
666       brw->ctx.NewDriverState |= BRW_NEW_META_IN_PROGRESS;
667    }
668 
669    if (brw->num_samples != fb_samples) {
670       brw->num_samples = fb_samples;
671       brw->ctx.NewDriverState |= BRW_NEW_NUM_SAMPLES;
672    }
673 
674    /* Exit early if no state is flagged as dirty */
675    merge_ctx_state(brw, &state);
676    if ((state.mesa | state.brw) == 0)
677       return;
678 
679    /* Emit Sandybridge workaround flushes on every primitive, for safety. */
680    if (devinfo->ver == 6)
681       brw_emit_post_sync_nonzero_flush(brw);
682 
683    brw_upload_programs(brw, pipeline);
684    merge_ctx_state(brw, &state);
685 
686    brw_upload_state_base_address(brw);
687 
688    const struct brw_tracked_state *atoms =
689       brw_get_pipeline_atoms(brw, pipeline);
690    const int num_atoms = brw->num_atoms[pipeline];
691 
692    if (INTEL_DEBUG(DEBUG_ANY)) {
693       /* Debug version which enforces various sanity checks on the
694        * state flags which are generated and checked to help ensure
695        * state atoms are ordered correctly in the list.
696        */
697       struct brw_state_flags examined, prev;
698       memset(&examined, 0, sizeof(examined));
699       prev = state;
700 
701       for (i = 0; i < num_atoms; i++) {
702          const struct brw_tracked_state *atom = &atoms[i];
703          struct brw_state_flags generated;
704 
705          check_and_emit_atom(brw, &state, atom);
706 
707          accumulate_state(&examined, &atom->dirty);
708 
709          /* generated = (prev ^ state)
710           * if (examined & generated)
711           *     fail;
712           */
713          xor_states(&generated, &prev, &state);
714          assert(!check_state(&examined, &generated));
715          prev = state;
716       }
717    }
718    else {
719       for (i = 0; i < num_atoms; i++) {
720          const struct brw_tracked_state *atom = &atoms[i];
721 
722          check_and_emit_atom(brw, &state, atom);
723       }
724    }
725 
726    if (INTEL_DEBUG(DEBUG_STATE)) {
727       STATIC_ASSERT(ARRAY_SIZE(brw_bits) == BRW_NUM_STATE_BITS + 1);
728 
729       brw_update_dirty_count(mesa_bits, state.mesa);
730       brw_update_dirty_count(brw_bits, state.brw);
731       if (dirty_count++ % 1000 == 0) {
732          brw_print_dirty_count(mesa_bits);
733          brw_print_dirty_count(brw_bits);
734          fprintf(stderr, "\n");
735       }
736    }
737 }
738 
739 /***********************************************************************
740  * Emit all state:
741  */
brw_upload_render_state(struct brw_context * brw)742 void brw_upload_render_state(struct brw_context *brw)
743 {
744    brw_upload_pipeline_state(brw, BRW_RENDER_PIPELINE);
745 }
746 
747 static inline void
brw_pipeline_state_finished(struct brw_context * brw,enum brw_pipeline pipeline)748 brw_pipeline_state_finished(struct brw_context *brw,
749                             enum brw_pipeline pipeline)
750 {
751    /* Save all dirty state into the other pipelines */
752    for (unsigned i = 0; i < BRW_NUM_PIPELINES; i++) {
753       if (i != pipeline) {
754          brw->state.pipelines[i].mesa |= brw->NewGLState;
755          brw->state.pipelines[i].brw |= brw->ctx.NewDriverState;
756       } else {
757          memset(&brw->state.pipelines[i], 0, sizeof(struct brw_state_flags));
758       }
759    }
760 
761    brw->NewGLState = 0;
762    brw->ctx.NewDriverState = 0ull;
763 }
764 
765 /**
766  * Clear dirty bits to account for the fact that the state emitted by
767  * brw_upload_render_state() has been committed to the hardware. This is a
768  * separate call from brw_upload_render_state() because it's possible that
769  * after the call to brw_upload_render_state(), we will discover that we've
770  * run out of aperture space, and need to rewind the batch buffer to the state
771  * it had before the brw_upload_render_state() call.
772  */
773 void
brw_render_state_finished(struct brw_context * brw)774 brw_render_state_finished(struct brw_context *brw)
775 {
776    brw_pipeline_state_finished(brw, BRW_RENDER_PIPELINE);
777 }
778 
779 void
brw_upload_compute_state(struct brw_context * brw)780 brw_upload_compute_state(struct brw_context *brw)
781 {
782    brw_upload_pipeline_state(brw, BRW_COMPUTE_PIPELINE);
783 }
784 
785 void
brw_compute_state_finished(struct brw_context * brw)786 brw_compute_state_finished(struct brw_context *brw)
787 {
788    brw_pipeline_state_finished(brw, BRW_COMPUTE_PIPELINE);
789 }
790