1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *
26  */
27 
28 #ifndef BRW_FS_H
29 #define BRW_FS_H
30 
31 #include "brw_shader.h"
32 #include "brw_ir_fs.h"
33 #include "brw_fs_builder.h"
34 #include "brw_fs_live_variables.h"
35 #include "brw_ir_performance.h"
36 #include "compiler/nir/nir.h"
37 
38 struct bblock_t;
39 namespace {
40    struct acp_entry;
41 }
42 
43 class fs_visitor;
44 
45 namespace brw {
46    /**
47     * Register pressure analysis of a shader.  Estimates how many registers
48     * are live at any point of the program in GRF units.
49     */
50    struct register_pressure {
51       register_pressure(const fs_visitor *v);
52       ~register_pressure();
53 
54       analysis_dependency_class
dependency_classregister_pressure55       dependency_class() const
56       {
57          return (DEPENDENCY_INSTRUCTION_IDENTITY |
58                  DEPENDENCY_INSTRUCTION_DATA_FLOW |
59                  DEPENDENCY_VARIABLES);
60       }
61 
62       bool
validateregister_pressure63       validate(const fs_visitor *) const
64       {
65          /* FINISHME */
66          return true;
67       }
68 
69       unsigned *regs_live_at_ip;
70    };
71 }
72 
73 struct brw_gs_compile;
74 
75 static inline fs_reg
offset(const fs_reg & reg,const brw::fs_builder & bld,unsigned delta)76 offset(const fs_reg &reg, const brw::fs_builder &bld, unsigned delta)
77 {
78    return offset(reg, bld.dispatch_width(), delta);
79 }
80 
81 struct shader_stats {
82    const char *scheduler_mode;
83    unsigned promoted_constants;
84 };
85 
86 /**
87  * The fragment shader front-end.
88  *
89  * Translates either GLSL IR or Mesa IR (for ARB_fragment_program) into FS IR.
90  */
91 class fs_visitor : public backend_shader
92 {
93 public:
94    fs_visitor(const struct brw_compiler *compiler, void *log_data,
95               void *mem_ctx,
96               const brw_base_prog_key *key,
97               struct brw_stage_prog_data *prog_data,
98               const nir_shader *shader,
99               unsigned dispatch_width,
100               int shader_time_index,
101               bool debug_enabled);
102    fs_visitor(const struct brw_compiler *compiler, void *log_data,
103               void *mem_ctx,
104               struct brw_gs_compile *gs_compile,
105               struct brw_gs_prog_data *prog_data,
106               const nir_shader *shader,
107               int shader_time_index,
108               bool debug_enabled);
109    void init();
110    ~fs_visitor();
111 
112    fs_reg vgrf(const glsl_type *const type);
113    void import_uniforms(fs_visitor *v);
114 
115    void VARYING_PULL_CONSTANT_LOAD(const brw::fs_builder &bld,
116                                    const fs_reg &dst,
117                                    const fs_reg &surf_index,
118                                    const fs_reg &varying_offset,
119                                    uint32_t const_offset,
120                                    uint8_t alignment);
121    void DEP_RESOLVE_MOV(const brw::fs_builder &bld, int grf);
122 
123    bool run_fs(bool allow_spilling, bool do_rep_send);
124    bool run_vs();
125    bool run_tcs();
126    bool run_tes();
127    bool run_gs();
128    bool run_cs(bool allow_spilling);
129    bool run_bs(bool allow_spilling);
130    void optimize();
131    void allocate_registers(bool allow_spilling);
132    void setup_fs_payload_gfx4();
133    void setup_fs_payload_gfx6();
134    void setup_vs_payload();
135    void setup_gs_payload();
136    void setup_cs_payload();
137    bool fixup_sends_duplicate_payload();
138    void fixup_3src_null_dest();
139    bool fixup_nomask_control_flow();
140    void assign_curb_setup();
141    void assign_urb_setup();
142    void convert_attr_sources_to_hw_regs(fs_inst *inst);
143    void assign_vs_urb_setup();
144    void assign_tcs_urb_setup();
145    void assign_tes_urb_setup();
146    void assign_gs_urb_setup();
147    bool assign_regs(bool allow_spilling, bool spill_all);
148    void assign_regs_trivial();
149    void calculate_payload_ranges(int payload_node_count,
150                                  int *payload_last_use_ip) const;
151    void split_virtual_grfs();
152    bool compact_virtual_grfs();
153    void assign_constant_locations();
154    bool get_pull_locs(const fs_reg &src, unsigned *out_surf_index,
155                       unsigned *out_pull_index);
156    void lower_constant_loads();
157    virtual void invalidate_analysis(brw::analysis_dependency_class c);
158    void validate();
159    bool opt_algebraic();
160    bool opt_redundant_halt();
161    bool opt_cse();
162    bool opt_cse_local(const brw::fs_live_variables &live, bblock_t *block, int &ip);
163 
164    bool opt_copy_propagation();
165    bool try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry);
166    bool try_constant_propagate(fs_inst *inst, acp_entry *entry);
167    bool opt_copy_propagation_local(void *mem_ctx, bblock_t *block,
168                                    exec_list *acp);
169    bool opt_drop_redundant_mov_to_flags();
170    bool opt_register_renaming();
171    bool opt_bank_conflicts();
172    bool register_coalesce();
173    bool compute_to_mrf();
174    bool eliminate_find_live_channel();
175    bool dead_code_eliminate();
176    bool remove_duplicate_mrf_writes();
177    bool remove_extra_rounding_modes();
178 
179    void schedule_instructions(instruction_scheduler_mode mode);
180    void insert_gfx4_send_dependency_workarounds();
181    void insert_gfx4_pre_send_dependency_workarounds(bblock_t *block,
182                                                     fs_inst *inst);
183    void insert_gfx4_post_send_dependency_workarounds(bblock_t *block,
184                                                      fs_inst *inst);
185    void vfail(const char *msg, va_list args);
186    void fail(const char *msg, ...);
187    void limit_dispatch_width(unsigned n, const char *msg);
188    void lower_uniform_pull_constant_loads();
189    bool lower_load_payload();
190    bool lower_pack();
191    bool lower_regioning();
192    bool lower_logical_sends();
193    bool lower_integer_multiplication();
194    bool lower_minmax();
195    bool lower_simd_width();
196    bool lower_barycentrics();
197    bool lower_derivatives();
198    bool lower_scoreboard();
199    bool lower_sub_sat();
200    bool opt_combine_constants();
201 
202    void emit_dummy_fs();
203    void emit_repclear_shader();
204    void emit_fragcoord_interpolation(fs_reg wpos);
205    fs_reg *emit_frontfacing_interpolation();
206    fs_reg *emit_samplepos_setup();
207    fs_reg *emit_sampleid_setup();
208    fs_reg *emit_samplemaskin_setup();
209    fs_reg *emit_shading_rate_setup();
210    void emit_interpolation_setup_gfx4();
211    void emit_interpolation_setup_gfx6();
212    void compute_sample_position(fs_reg dst, fs_reg int_sample_pos);
213    fs_reg emit_mcs_fetch(const fs_reg &coordinate, unsigned components,
214                          const fs_reg &texture,
215                          const fs_reg &texture_handle);
216    void emit_gfx6_gather_wa(uint8_t wa, fs_reg dst);
217    fs_reg resolve_source_modifiers(const fs_reg &src);
218    void emit_fsign(const class brw::fs_builder &, const nir_alu_instr *instr,
219                    fs_reg result, fs_reg *op, unsigned fsign_src);
220    void emit_shader_float_controls_execution_mode();
221    bool opt_peephole_sel();
222    bool opt_peephole_predicated_break();
223    bool opt_saturate_propagation();
224    bool opt_cmod_propagation();
225    bool opt_zero_samples();
226 
227    void set_tcs_invocation_id();
228 
229    void emit_nir_code();
230    void nir_setup_outputs();
231    void nir_setup_uniforms();
232    void nir_emit_system_values();
233    void nir_emit_impl(nir_function_impl *impl);
234    void nir_emit_cf_list(exec_list *list);
235    void nir_emit_if(nir_if *if_stmt);
236    void nir_emit_loop(nir_loop *loop);
237    void nir_emit_block(nir_block *block);
238    void nir_emit_instr(nir_instr *instr);
239    void nir_emit_alu(const brw::fs_builder &bld, nir_alu_instr *instr,
240                      bool need_dest);
241    bool try_emit_b2fi_of_inot(const brw::fs_builder &bld, fs_reg result,
242                               nir_alu_instr *instr);
243    void nir_emit_load_const(const brw::fs_builder &bld,
244                             nir_load_const_instr *instr);
245    void nir_emit_vs_intrinsic(const brw::fs_builder &bld,
246                               nir_intrinsic_instr *instr);
247    void nir_emit_tcs_intrinsic(const brw::fs_builder &bld,
248                                nir_intrinsic_instr *instr);
249    void nir_emit_gs_intrinsic(const brw::fs_builder &bld,
250                               nir_intrinsic_instr *instr);
251    void nir_emit_fs_intrinsic(const brw::fs_builder &bld,
252                               nir_intrinsic_instr *instr);
253    void nir_emit_cs_intrinsic(const brw::fs_builder &bld,
254                               nir_intrinsic_instr *instr);
255    void nir_emit_bs_intrinsic(const brw::fs_builder &bld,
256                               nir_intrinsic_instr *instr);
257    fs_reg get_nir_image_intrinsic_image(const brw::fs_builder &bld,
258                                         nir_intrinsic_instr *instr);
259    fs_reg get_nir_ssbo_intrinsic_index(const brw::fs_builder &bld,
260                                        nir_intrinsic_instr *instr);
261    fs_reg swizzle_nir_scratch_addr(const brw::fs_builder &bld,
262                                    const fs_reg &addr,
263                                    bool in_dwords);
264    void nir_emit_intrinsic(const brw::fs_builder &bld,
265                            nir_intrinsic_instr *instr);
266    void nir_emit_tes_intrinsic(const brw::fs_builder &bld,
267                                nir_intrinsic_instr *instr);
268    void nir_emit_ssbo_atomic(const brw::fs_builder &bld,
269                              int op, nir_intrinsic_instr *instr);
270    void nir_emit_ssbo_atomic_float(const brw::fs_builder &bld,
271                                    int op, nir_intrinsic_instr *instr);
272    void nir_emit_shared_atomic(const brw::fs_builder &bld,
273                                int op, nir_intrinsic_instr *instr);
274    void nir_emit_shared_atomic_float(const brw::fs_builder &bld,
275                                      int op, nir_intrinsic_instr *instr);
276    void nir_emit_global_atomic(const brw::fs_builder &bld,
277                                int op, nir_intrinsic_instr *instr);
278    void nir_emit_global_atomic_float(const brw::fs_builder &bld,
279                                      int op, nir_intrinsic_instr *instr);
280    void nir_emit_texture(const brw::fs_builder &bld,
281                          nir_tex_instr *instr);
282    void nir_emit_jump(const brw::fs_builder &bld,
283                       nir_jump_instr *instr);
284    fs_reg get_nir_src(const nir_src &src);
285    fs_reg get_nir_src_imm(const nir_src &src);
286    fs_reg get_nir_dest(const nir_dest &dest);
287    fs_reg get_indirect_offset(nir_intrinsic_instr *instr);
288    fs_reg get_tcs_single_patch_icp_handle(const brw::fs_builder &bld,
289                                           nir_intrinsic_instr *instr);
290    fs_reg get_tcs_eight_patch_icp_handle(const brw::fs_builder &bld,
291                                          nir_intrinsic_instr *instr);
292    struct brw_reg get_tcs_output_urb_handle();
293 
294    void emit_percomp(const brw::fs_builder &bld, const fs_inst &inst,
295                      unsigned wr_mask);
296 
297    bool optimize_extract_to_float(nir_alu_instr *instr,
298                                   const fs_reg &result);
299    bool optimize_frontfacing_ternary(nir_alu_instr *instr,
300                                      const fs_reg &result);
301 
302    void emit_alpha_test();
303    fs_inst *emit_single_fb_write(const brw::fs_builder &bld,
304                                  fs_reg color1, fs_reg color2,
305                                  fs_reg src0_alpha, unsigned components);
306    void emit_alpha_to_coverage_workaround(const fs_reg &src0_alpha);
307    void emit_fb_writes();
308    fs_inst *emit_non_coherent_fb_read(const brw::fs_builder &bld,
309                                       const fs_reg &dst, unsigned target);
310    void emit_urb_writes(const fs_reg &gs_vertex_count = fs_reg());
311    void set_gs_stream_control_data_bits(const fs_reg &vertex_count,
312                                         unsigned stream_id);
313    void emit_gs_control_data_bits(const fs_reg &vertex_count);
314    void emit_gs_end_primitive(const nir_src &vertex_count_nir_src);
315    void emit_gs_vertex(const nir_src &vertex_count_nir_src,
316                        unsigned stream_id);
317    void emit_gs_thread_end();
318    void emit_gs_input_load(const fs_reg &dst, const nir_src &vertex_src,
319                            unsigned base_offset, const nir_src &offset_src,
320                            unsigned num_components, unsigned first_component);
321    void emit_cs_terminate();
322    fs_reg *emit_cs_work_group_id_setup();
323 
324    void emit_barrier();
325 
326    void emit_shader_time_begin();
327    void emit_shader_time_end();
328    void SHADER_TIME_ADD(const brw::fs_builder &bld,
329                         int shader_time_subindex,
330                         fs_reg value);
331 
332    fs_reg get_timestamp(const brw::fs_builder &bld);
333 
334    fs_reg interp_reg(int location, int channel);
335 
336    virtual void dump_instructions() const;
337    virtual void dump_instructions(const char *name) const;
338    void dump_instruction(const backend_instruction *inst) const;
339    void dump_instruction(const backend_instruction *inst, FILE *file) const;
340 
341    const brw_base_prog_key *const key;
342    const struct brw_sampler_prog_key_data *key_tex;
343 
344    struct brw_gs_compile *gs_compile;
345 
346    struct brw_stage_prog_data *prog_data;
347 
348    brw_analysis<brw::fs_live_variables, backend_shader> live_analysis;
349    brw_analysis<brw::register_pressure, fs_visitor> regpressure_analysis;
350    brw_analysis<brw::performance, fs_visitor> performance_analysis;
351 
352    /** Number of uniform variable components visited. */
353    unsigned uniforms;
354 
355    /** Byte-offset for the next available spot in the scratch space buffer. */
356    unsigned last_scratch;
357 
358    /**
359     * Array mapping UNIFORM register numbers to the pull parameter index,
360     * or -1 if this uniform register isn't being uploaded as a pull constant.
361     */
362    int *pull_constant_loc;
363 
364    /**
365     * Array mapping UNIFORM register numbers to the push parameter index,
366     * or -1 if this uniform register isn't being uploaded as a push constant.
367     */
368    int *push_constant_loc;
369 
370    fs_reg subgroup_id;
371    fs_reg group_size[3];
372    fs_reg scratch_base;
373    fs_reg frag_depth;
374    fs_reg frag_stencil;
375    fs_reg sample_mask;
376    fs_reg outputs[VARYING_SLOT_MAX];
377    fs_reg dual_src_output;
378    int first_non_payload_grf;
379    /** Either BRW_MAX_GRF or GFX7_MRF_HACK_START */
380    unsigned max_grf;
381 
382    fs_reg *nir_locals;
383    fs_reg *nir_ssa_values;
384    fs_reg *nir_system_values;
385 
386    bool failed;
387    char *fail_msg;
388 
389    /** Register numbers for thread payload fields. */
390    struct thread_payload {
391       uint8_t subspan_coord_reg[2];
392       uint8_t source_depth_reg[2];
393       uint8_t source_w_reg[2];
394       uint8_t aa_dest_stencil_reg[2];
395       uint8_t dest_depth_reg[2];
396       uint8_t sample_pos_reg[2];
397       uint8_t sample_mask_in_reg[2];
398       uint8_t depth_w_coef_reg[2];
399       uint8_t barycentric_coord_reg[BRW_BARYCENTRIC_MODE_COUNT][2];
400       uint8_t local_invocation_id_reg[2];
401 
402       /** The number of thread payload registers the hardware will supply. */
403       uint8_t num_regs;
404    } payload;
405 
406    bool source_depth_to_render_target;
407    bool runtime_check_aads_emit;
408 
409    fs_reg pixel_x;
410    fs_reg pixel_y;
411    fs_reg pixel_z;
412    fs_reg wpos_w;
413    fs_reg pixel_w;
414    fs_reg delta_xy[BRW_BARYCENTRIC_MODE_COUNT];
415    fs_reg shader_start_time;
416    fs_reg final_gs_vertex_count;
417    fs_reg control_data_bits;
418    fs_reg invocation_id;
419 
420    unsigned grf_used;
421    bool spilled_any_registers;
422 
423    const unsigned dispatch_width; /**< 8, 16 or 32 */
424    unsigned max_dispatch_width;
425 
426    int shader_time_index;
427 
428    struct shader_stats shader_stats;
429 
430    brw::fs_builder bld;
431 
432 private:
433    fs_reg prepare_alu_destination_and_sources(const brw::fs_builder &bld,
434                                               nir_alu_instr *instr,
435                                               fs_reg *op,
436                                               bool need_dest);
437 
438    void resolve_inot_sources(const brw::fs_builder &bld, nir_alu_instr *instr,
439                              fs_reg *op);
440    void lower_mul_dword_inst(fs_inst *inst, bblock_t *block);
441    void lower_mul_qword_inst(fs_inst *inst, bblock_t *block);
442    void lower_mulh_inst(fs_inst *inst, bblock_t *block);
443 
444    unsigned workgroup_size() const;
445 };
446 
447 /**
448  * Return the flag register used in fragment shaders to keep track of live
449  * samples.  On Gfx7+ we use f1.0-f1.1 to allow discard jumps in SIMD32
450  * dispatch mode, while earlier generations are constrained to f0.1, which
451  * limits the dispatch width to SIMD16 for fragment shaders that use discard.
452  */
453 static inline unsigned
sample_mask_flag_subreg(const fs_visitor * shader)454 sample_mask_flag_subreg(const fs_visitor *shader)
455 {
456    assert(shader->stage == MESA_SHADER_FRAGMENT);
457    return shader->devinfo->ver >= 7 ? 2 : 1;
458 }
459 
460 /**
461  * The fragment shader code generator.
462  *
463  * Translates FS IR to actual i965 assembly code.
464  */
465 class fs_generator
466 {
467 public:
468    fs_generator(const struct brw_compiler *compiler, void *log_data,
469                 void *mem_ctx,
470                 struct brw_stage_prog_data *prog_data,
471                 bool runtime_check_aads_emit,
472                 gl_shader_stage stage);
473    ~fs_generator();
474 
475    void enable_debug(const char *shader_name);
476    int generate_code(const cfg_t *cfg, int dispatch_width,
477                      struct shader_stats shader_stats,
478                      const brw::performance &perf,
479                      struct brw_compile_stats *stats);
480    void add_const_data(void *data, unsigned size);
481    void add_resume_sbt(unsigned num_resume_shaders, uint64_t *sbt);
482    const unsigned *get_assembly();
483 
484 private:
485    void fire_fb_write(fs_inst *inst,
486                       struct brw_reg payload,
487                       struct brw_reg implied_header,
488                       GLuint nr);
489    void generate_send(fs_inst *inst,
490                       struct brw_reg dst,
491                       struct brw_reg desc,
492                       struct brw_reg ex_desc,
493                       struct brw_reg payload,
494                       struct brw_reg payload2);
495    void generate_fb_write(fs_inst *inst, struct brw_reg payload);
496    void generate_fb_read(fs_inst *inst, struct brw_reg dst,
497                          struct brw_reg payload);
498    void generate_urb_read(fs_inst *inst, struct brw_reg dst, struct brw_reg payload);
499    void generate_urb_write(fs_inst *inst, struct brw_reg payload);
500    void generate_cs_terminate(fs_inst *inst, struct brw_reg payload);
501    void generate_barrier(fs_inst *inst, struct brw_reg src);
502    bool generate_linterp(fs_inst *inst, struct brw_reg dst,
503 			 struct brw_reg *src);
504    void generate_tex(fs_inst *inst, struct brw_reg dst,
505                      struct brw_reg surface_index,
506                      struct brw_reg sampler_index);
507    void generate_get_buffer_size(fs_inst *inst, struct brw_reg dst,
508                                  struct brw_reg src,
509                                  struct brw_reg surf_index);
510    void generate_ddx(const fs_inst *inst,
511                      struct brw_reg dst, struct brw_reg src);
512    void generate_ddy(const fs_inst *inst,
513                      struct brw_reg dst, struct brw_reg src);
514    void generate_scratch_write(fs_inst *inst, struct brw_reg src);
515    void generate_scratch_read(fs_inst *inst, struct brw_reg dst);
516    void generate_scratch_read_gfx7(fs_inst *inst, struct brw_reg dst);
517    void generate_scratch_header(fs_inst *inst, struct brw_reg dst);
518    void generate_uniform_pull_constant_load(fs_inst *inst, struct brw_reg dst,
519                                             struct brw_reg index,
520                                             struct brw_reg offset);
521    void generate_uniform_pull_constant_load_gfx7(fs_inst *inst,
522                                                  struct brw_reg dst,
523                                                  struct brw_reg surf_index,
524                                                  struct brw_reg payload);
525    void generate_varying_pull_constant_load_gfx4(fs_inst *inst,
526                                                  struct brw_reg dst,
527                                                  struct brw_reg index);
528    void generate_mov_dispatch_to_flags(fs_inst *inst);
529 
530    void generate_pixel_interpolator_query(fs_inst *inst,
531                                           struct brw_reg dst,
532                                           struct brw_reg src,
533                                           struct brw_reg msg_data,
534                                           unsigned msg_type);
535 
536    void generate_set_sample_id(fs_inst *inst,
537                                struct brw_reg dst,
538                                struct brw_reg src0,
539                                struct brw_reg src1);
540 
541    void generate_halt(fs_inst *inst);
542 
543    void generate_pack_half_2x16_split(fs_inst *inst,
544                                       struct brw_reg dst,
545                                       struct brw_reg x,
546                                       struct brw_reg y);
547 
548    void generate_shader_time_add(fs_inst *inst,
549                                  struct brw_reg payload,
550                                  struct brw_reg offset,
551                                  struct brw_reg value);
552 
553    void generate_mov_indirect(fs_inst *inst,
554                               struct brw_reg dst,
555                               struct brw_reg reg,
556                               struct brw_reg indirect_byte_offset);
557 
558    void generate_shuffle(fs_inst *inst,
559                          struct brw_reg dst,
560                          struct brw_reg src,
561                          struct brw_reg idx);
562 
563    void generate_quad_swizzle(const fs_inst *inst,
564                               struct brw_reg dst, struct brw_reg src,
565                               unsigned swiz);
566 
567    bool patch_halt_jumps();
568 
569    const struct brw_compiler *compiler;
570    void *log_data; /* Passed to compiler->*_log functions */
571 
572    const struct intel_device_info *devinfo;
573 
574    struct brw_codegen *p;
575    struct brw_stage_prog_data * const prog_data;
576 
577    unsigned dispatch_width; /**< 8, 16 or 32 */
578 
579    exec_list discard_halt_patches;
580    bool runtime_check_aads_emit;
581    bool debug_flag;
582    const char *shader_name;
583    gl_shader_stage stage;
584    void *mem_ctx;
585 };
586 
587 namespace brw {
588    inline fs_reg
589    fetch_payload_reg(const brw::fs_builder &bld, uint8_t regs[2],
590                      brw_reg_type type = BRW_REGISTER_TYPE_F)
591    {
592       if (!regs[0])
593          return fs_reg();
594 
595       if (bld.dispatch_width() > 16) {
596          const fs_reg tmp = bld.vgrf(type);
597          const brw::fs_builder hbld = bld.exec_all().group(16, 0);
598          const unsigned m = bld.dispatch_width() / hbld.dispatch_width();
599          fs_reg components[2];
600          assert(m <= 2);
601 
602          for (unsigned g = 0; g < m; g++)
603                components[g] = retype(brw_vec8_grf(regs[g], 0), type);
604 
605          hbld.LOAD_PAYLOAD(tmp, components, m, 0);
606 
607          return tmp;
608 
609       } else {
610          return fs_reg(retype(brw_vec8_grf(regs[0], 0), type));
611       }
612    }
613 
614    inline fs_reg
fetch_barycentric_reg(const brw::fs_builder & bld,uint8_t regs[2])615    fetch_barycentric_reg(const brw::fs_builder &bld, uint8_t regs[2])
616    {
617       if (!regs[0])
618          return fs_reg();
619 
620       const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F, 2);
621       const brw::fs_builder hbld = bld.exec_all().group(8, 0);
622       const unsigned m = bld.dispatch_width() / hbld.dispatch_width();
623       fs_reg *const components = new fs_reg[2 * m];
624 
625       for (unsigned c = 0; c < 2; c++) {
626          for (unsigned g = 0; g < m; g++)
627             components[c * m + g] = offset(brw_vec8_grf(regs[g / 2], 0),
628                                            hbld, c + 2 * (g % 2));
629       }
630 
631       hbld.LOAD_PAYLOAD(tmp, components, 2 * m, 0);
632 
633       delete[] components;
634       return tmp;
635    }
636 
637    bool
638    lower_src_modifiers(fs_visitor *v, bblock_t *block, fs_inst *inst, unsigned i);
639 }
640 
641 void shuffle_from_32bit_read(const brw::fs_builder &bld,
642                              const fs_reg &dst,
643                              const fs_reg &src,
644                              uint32_t first_component,
645                              uint32_t components);
646 
647 fs_reg setup_imm_df(const brw::fs_builder &bld,
648                     double v);
649 
650 fs_reg setup_imm_b(const brw::fs_builder &bld,
651                    int8_t v);
652 
653 fs_reg setup_imm_ub(const brw::fs_builder &bld,
654                    uint8_t v);
655 
656 enum brw_barycentric_mode brw_barycentric_mode(enum glsl_interp_mode mode,
657                                                nir_intrinsic_op op);
658 
659 uint32_t brw_fb_write_msg_control(const fs_inst *inst,
660                                   const struct brw_wm_prog_data *prog_data);
661 
662 void brw_compute_urb_setup_index(struct brw_wm_prog_data *wm_prog_data);
663 
664 #endif /* BRW_FS_H */
665