1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *
26  */
27 
28 #ifndef BRW_FS_H
29 #define BRW_FS_H
30 
31 #include "brw_shader.h"
32 #include "brw_ir_fs.h"
33 #include "brw_fs_builder.h"
34 #include "brw_fs_live_variables.h"
35 #include "brw_ir_performance.h"
36 #include "compiler/nir/nir.h"
37 
38 struct bblock_t;
39 namespace {
40    struct acp_entry;
41 }
42 
43 class fs_visitor;
44 
45 namespace brw {
46    /**
47     * Register pressure analysis of a shader.  Estimates how many registers
48     * are live at any point of the program in GRF units.
49     */
50    struct register_pressure {
51       register_pressure(const fs_visitor *v);
52       ~register_pressure();
53 
54       analysis_dependency_class
dependency_classregister_pressure55       dependency_class() const
56       {
57          return (DEPENDENCY_INSTRUCTION_IDENTITY |
58                  DEPENDENCY_INSTRUCTION_DATA_FLOW |
59                  DEPENDENCY_VARIABLES);
60       }
61 
62       bool
validateregister_pressure63       validate(const fs_visitor *) const
64       {
65          /* FINISHME */
66          return true;
67       }
68 
69       unsigned *regs_live_at_ip;
70    };
71 }
72 
73 struct brw_gs_compile;
74 
75 static inline fs_reg
offset(const fs_reg & reg,const brw::fs_builder & bld,unsigned delta)76 offset(const fs_reg &reg, const brw::fs_builder &bld, unsigned delta)
77 {
78    return offset(reg, bld.dispatch_width(), delta);
79 }
80 
81 #define UBO_START ((1 << 16) - 4)
82 
83 struct shader_stats {
84    const char *scheduler_mode;
85    unsigned promoted_constants;
86 };
87 
88 /**
89  * The fragment shader front-end.
90  *
91  * Translates either GLSL IR or Mesa IR (for ARB_fragment_program) into FS IR.
92  */
93 class fs_visitor : public backend_shader
94 {
95 public:
96    fs_visitor(const struct brw_compiler *compiler, void *log_data,
97               void *mem_ctx,
98               const brw_base_prog_key *key,
99               struct brw_stage_prog_data *prog_data,
100               const nir_shader *shader,
101               unsigned dispatch_width,
102               int shader_time_index,
103               const struct brw_vue_map *input_vue_map = NULL);
104    fs_visitor(const struct brw_compiler *compiler, void *log_data,
105               void *mem_ctx,
106               struct brw_gs_compile *gs_compile,
107               struct brw_gs_prog_data *prog_data,
108               const nir_shader *shader,
109               int shader_time_index);
110    void init();
111    ~fs_visitor();
112 
113    fs_reg vgrf(const glsl_type *const type);
114    void import_uniforms(fs_visitor *v);
115 
116    void VARYING_PULL_CONSTANT_LOAD(const brw::fs_builder &bld,
117                                    const fs_reg &dst,
118                                    const fs_reg &surf_index,
119                                    const fs_reg &varying_offset,
120                                    uint32_t const_offset);
121    void DEP_RESOLVE_MOV(const brw::fs_builder &bld, int grf);
122 
123    bool run_fs(bool allow_spilling, bool do_rep_send);
124    bool run_vs();
125    bool run_tcs();
126    bool run_tes();
127    bool run_gs();
128    bool run_cs(bool allow_spilling);
129    void optimize();
130    void allocate_registers(bool allow_spilling);
131    void setup_fs_payload_gen4();
132    void setup_fs_payload_gen6();
133    void setup_vs_payload();
134    void setup_gs_payload();
135    void setup_cs_payload();
136    bool fixup_sends_duplicate_payload();
137    void fixup_3src_null_dest();
138    bool fixup_nomask_control_flow();
139    void assign_curb_setup();
140    void assign_urb_setup();
141    void convert_attr_sources_to_hw_regs(fs_inst *inst);
142    void assign_vs_urb_setup();
143    void assign_tcs_urb_setup();
144    void assign_tes_urb_setup();
145    void assign_gs_urb_setup();
146    bool assign_regs(bool allow_spilling, bool spill_all);
147    void assign_regs_trivial();
148    void calculate_payload_ranges(int payload_node_count,
149                                  int *payload_last_use_ip) const;
150    void split_virtual_grfs();
151    bool compact_virtual_grfs();
152    void assign_constant_locations();
153    bool get_pull_locs(const fs_reg &src, unsigned *out_surf_index,
154                       unsigned *out_pull_index);
155    void lower_constant_loads();
156    virtual void invalidate_analysis(brw::analysis_dependency_class c);
157    void validate();
158    bool opt_algebraic();
159    bool opt_redundant_discard_jumps();
160    bool opt_cse();
161    bool opt_cse_local(const brw::fs_live_variables &live, bblock_t *block, int &ip);
162 
163    bool opt_copy_propagation();
164    bool try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry);
165    bool try_constant_propagate(fs_inst *inst, acp_entry *entry);
166    bool opt_copy_propagation_local(void *mem_ctx, bblock_t *block,
167                                    exec_list *acp);
168    bool opt_drop_redundant_mov_to_flags();
169    bool opt_register_renaming();
170    bool opt_bank_conflicts();
171    bool register_coalesce();
172    bool compute_to_mrf();
173    bool eliminate_find_live_channel();
174    bool dead_code_eliminate();
175    bool remove_duplicate_mrf_writes();
176    bool remove_extra_rounding_modes();
177 
178    void schedule_instructions(instruction_scheduler_mode mode);
179    void insert_gen4_send_dependency_workarounds();
180    void insert_gen4_pre_send_dependency_workarounds(bblock_t *block,
181                                                     fs_inst *inst);
182    void insert_gen4_post_send_dependency_workarounds(bblock_t *block,
183                                                      fs_inst *inst);
184    void vfail(const char *msg, va_list args);
185    void fail(const char *msg, ...);
186    void limit_dispatch_width(unsigned n, const char *msg);
187    void lower_uniform_pull_constant_loads();
188    bool lower_load_payload();
189    bool lower_pack();
190    bool lower_regioning();
191    bool lower_logical_sends();
192    bool lower_integer_multiplication();
193    bool lower_minmax();
194    bool lower_simd_width();
195    bool lower_barycentrics();
196    bool lower_scoreboard();
197    bool lower_sub_sat();
198    bool opt_combine_constants();
199 
200    void emit_dummy_fs();
201    void emit_repclear_shader();
202    void emit_fragcoord_interpolation(fs_reg wpos);
203    fs_reg *emit_frontfacing_interpolation();
204    fs_reg *emit_samplepos_setup();
205    fs_reg *emit_sampleid_setup();
206    fs_reg *emit_samplemaskin_setup();
207    void emit_interpolation_setup_gen4();
208    void emit_interpolation_setup_gen6();
209    void compute_sample_position(fs_reg dst, fs_reg int_sample_pos);
210    fs_reg emit_mcs_fetch(const fs_reg &coordinate, unsigned components,
211                          const fs_reg &texture,
212                          const fs_reg &texture_handle);
213    void emit_gen6_gather_wa(uint8_t wa, fs_reg dst);
214    fs_reg resolve_source_modifiers(const fs_reg &src);
215    void emit_discard_jump();
216    void emit_fsign(const class brw::fs_builder &, const nir_alu_instr *instr,
217                    fs_reg result, fs_reg *op, unsigned fsign_src);
218    void emit_shader_float_controls_execution_mode();
219    bool opt_peephole_sel();
220    bool opt_peephole_predicated_break();
221    bool opt_saturate_propagation();
222    bool opt_cmod_propagation();
223    bool opt_zero_samples();
224 
225    void set_tcs_invocation_id();
226 
227    void emit_nir_code();
228    void nir_setup_outputs();
229    void nir_setup_uniforms();
230    void nir_emit_system_values();
231    void nir_emit_impl(nir_function_impl *impl);
232    void nir_emit_cf_list(exec_list *list);
233    void nir_emit_if(nir_if *if_stmt);
234    void nir_emit_loop(nir_loop *loop);
235    void nir_emit_block(nir_block *block);
236    void nir_emit_instr(nir_instr *instr);
237    void nir_emit_alu(const brw::fs_builder &bld, nir_alu_instr *instr,
238                      bool need_dest);
239    bool try_emit_b2fi_of_inot(const brw::fs_builder &bld, fs_reg result,
240                               nir_alu_instr *instr);
241    void nir_emit_load_const(const brw::fs_builder &bld,
242                             nir_load_const_instr *instr);
243    void nir_emit_vs_intrinsic(const brw::fs_builder &bld,
244                               nir_intrinsic_instr *instr);
245    void nir_emit_tcs_intrinsic(const brw::fs_builder &bld,
246                                nir_intrinsic_instr *instr);
247    void nir_emit_gs_intrinsic(const brw::fs_builder &bld,
248                               nir_intrinsic_instr *instr);
249    void nir_emit_fs_intrinsic(const brw::fs_builder &bld,
250                               nir_intrinsic_instr *instr);
251    void nir_emit_cs_intrinsic(const brw::fs_builder &bld,
252                               nir_intrinsic_instr *instr);
253    fs_reg get_nir_image_intrinsic_image(const brw::fs_builder &bld,
254                                         nir_intrinsic_instr *instr);
255    fs_reg get_nir_ssbo_intrinsic_index(const brw::fs_builder &bld,
256                                        nir_intrinsic_instr *instr);
257    fs_reg swizzle_nir_scratch_addr(const brw::fs_builder &bld,
258                                    const fs_reg &addr,
259                                    bool in_dwords);
260    void nir_emit_intrinsic(const brw::fs_builder &bld,
261                            nir_intrinsic_instr *instr);
262    void nir_emit_tes_intrinsic(const brw::fs_builder &bld,
263                                nir_intrinsic_instr *instr);
264    void nir_emit_ssbo_atomic(const brw::fs_builder &bld,
265                              int op, nir_intrinsic_instr *instr);
266    void nir_emit_ssbo_atomic_float(const brw::fs_builder &bld,
267                                    int op, nir_intrinsic_instr *instr);
268    void nir_emit_shared_atomic(const brw::fs_builder &bld,
269                                int op, nir_intrinsic_instr *instr);
270    void nir_emit_shared_atomic_float(const brw::fs_builder &bld,
271                                      int op, nir_intrinsic_instr *instr);
272    void nir_emit_global_atomic(const brw::fs_builder &bld,
273                                int op, nir_intrinsic_instr *instr);
274    void nir_emit_global_atomic_float(const brw::fs_builder &bld,
275                                      int op, nir_intrinsic_instr *instr);
276    void nir_emit_texture(const brw::fs_builder &bld,
277                          nir_tex_instr *instr);
278    void nir_emit_jump(const brw::fs_builder &bld,
279                       nir_jump_instr *instr);
280    fs_reg get_nir_src(const nir_src &src);
281    fs_reg get_nir_src_imm(const nir_src &src);
282    fs_reg get_nir_dest(const nir_dest &dest);
283    fs_reg get_indirect_offset(nir_intrinsic_instr *instr);
284    fs_reg get_tcs_single_patch_icp_handle(const brw::fs_builder &bld,
285                                           nir_intrinsic_instr *instr);
286    fs_reg get_tcs_eight_patch_icp_handle(const brw::fs_builder &bld,
287                                          nir_intrinsic_instr *instr);
288    struct brw_reg get_tcs_output_urb_handle();
289 
290    void emit_percomp(const brw::fs_builder &bld, const fs_inst &inst,
291                      unsigned wr_mask);
292 
293    bool optimize_extract_to_float(nir_alu_instr *instr,
294                                   const fs_reg &result);
295    bool optimize_frontfacing_ternary(nir_alu_instr *instr,
296                                      const fs_reg &result);
297 
298    void emit_alpha_test();
299    fs_inst *emit_single_fb_write(const brw::fs_builder &bld,
300                                  fs_reg color1, fs_reg color2,
301                                  fs_reg src0_alpha, unsigned components);
302    void emit_alpha_to_coverage_workaround(const fs_reg &src0_alpha);
303    void emit_fb_writes();
304    fs_inst *emit_non_coherent_fb_read(const brw::fs_builder &bld,
305                                       const fs_reg &dst, unsigned target);
306    void emit_urb_writes(const fs_reg &gs_vertex_count = fs_reg());
307    void set_gs_stream_control_data_bits(const fs_reg &vertex_count,
308                                         unsigned stream_id);
309    void emit_gs_control_data_bits(const fs_reg &vertex_count);
310    void emit_gs_end_primitive(const nir_src &vertex_count_nir_src);
311    void emit_gs_vertex(const nir_src &vertex_count_nir_src,
312                        unsigned stream_id);
313    void emit_gs_thread_end();
314    void emit_gs_input_load(const fs_reg &dst, const nir_src &vertex_src,
315                            unsigned base_offset, const nir_src &offset_src,
316                            unsigned num_components, unsigned first_component);
317    void emit_cs_terminate();
318    fs_reg *emit_cs_work_group_id_setup();
319 
320    void emit_barrier();
321 
322    void emit_shader_time_begin();
323    void emit_shader_time_end();
324    void SHADER_TIME_ADD(const brw::fs_builder &bld,
325                         int shader_time_subindex,
326                         fs_reg value);
327 
328    fs_reg get_timestamp(const brw::fs_builder &bld);
329 
330    fs_reg interp_reg(int location, int channel);
331 
332    virtual void dump_instructions() const;
333    virtual void dump_instructions(const char *name) const;
334    void dump_instruction(const backend_instruction *inst) const;
335    void dump_instruction(const backend_instruction *inst, FILE *file) const;
336 
337    const brw_base_prog_key *const key;
338    const struct brw_sampler_prog_key_data *key_tex;
339 
340    struct brw_gs_compile *gs_compile;
341 
342    struct brw_stage_prog_data *prog_data;
343 
344    const struct brw_vue_map *input_vue_map;
345 
346    int *param_size;
347 
348    BRW_ANALYSIS(live_analysis, brw::fs_live_variables,
349                 backend_shader *) live_analysis;
350    BRW_ANALYSIS(regpressure_analysis, brw::register_pressure,
351                 fs_visitor *) regpressure_analysis;
352    BRW_ANALYSIS(performance_analysis, brw::performance,
353                 fs_visitor *) performance_analysis;
354 
355    /** Number of uniform variable components visited. */
356    unsigned uniforms;
357 
358    /** Byte-offset for the next available spot in the scratch space buffer. */
359    unsigned last_scratch;
360 
361    /**
362     * Array mapping UNIFORM register numbers to the pull parameter index,
363     * or -1 if this uniform register isn't being uploaded as a pull constant.
364     */
365    int *pull_constant_loc;
366 
367    /**
368     * Array mapping UNIFORM register numbers to the push parameter index,
369     * or -1 if this uniform register isn't being uploaded as a push constant.
370     */
371    int *push_constant_loc;
372 
373    fs_reg subgroup_id;
374    fs_reg group_size[3];
375    fs_reg scratch_base;
376    fs_reg frag_depth;
377    fs_reg frag_stencil;
378    fs_reg sample_mask;
379    fs_reg outputs[VARYING_SLOT_MAX];
380    fs_reg dual_src_output;
381    int first_non_payload_grf;
382    /** Either BRW_MAX_GRF or GEN7_MRF_HACK_START */
383    unsigned max_grf;
384 
385    fs_reg *nir_locals;
386    fs_reg *nir_ssa_values;
387    fs_reg *nir_system_values;
388 
389    bool failed;
390    char *fail_msg;
391 
392    /** Register numbers for thread payload fields. */
393    struct thread_payload {
394       uint8_t subspan_coord_reg[2];
395       uint8_t source_depth_reg[2];
396       uint8_t source_w_reg[2];
397       uint8_t aa_dest_stencil_reg[2];
398       uint8_t dest_depth_reg[2];
399       uint8_t sample_pos_reg[2];
400       uint8_t sample_mask_in_reg[2];
401       uint8_t barycentric_coord_reg[BRW_BARYCENTRIC_MODE_COUNT][2];
402       uint8_t local_invocation_id_reg[2];
403 
404       /** The number of thread payload registers the hardware will supply. */
405       uint8_t num_regs;
406    } payload;
407 
408    bool source_depth_to_render_target;
409    bool runtime_check_aads_emit;
410 
411    fs_reg pixel_x;
412    fs_reg pixel_y;
413    fs_reg wpos_w;
414    fs_reg pixel_w;
415    fs_reg delta_xy[BRW_BARYCENTRIC_MODE_COUNT];
416    fs_reg shader_start_time;
417    fs_reg final_gs_vertex_count;
418    fs_reg control_data_bits;
419    fs_reg invocation_id;
420 
421    unsigned grf_used;
422    bool spilled_any_registers;
423 
424    const unsigned dispatch_width; /**< 8, 16 or 32 */
425    unsigned max_dispatch_width;
426 
427    int shader_time_index;
428 
429    struct shader_stats shader_stats;
430 
431    brw::fs_builder bld;
432 
433 private:
434    fs_reg prepare_alu_destination_and_sources(const brw::fs_builder &bld,
435                                               nir_alu_instr *instr,
436                                               fs_reg *op,
437                                               bool need_dest);
438 
439    void resolve_inot_sources(const brw::fs_builder &bld, nir_alu_instr *instr,
440                              fs_reg *op);
441    void lower_mul_dword_inst(fs_inst *inst, bblock_t *block);
442    void lower_mul_qword_inst(fs_inst *inst, bblock_t *block);
443    void lower_mulh_inst(fs_inst *inst, bblock_t *block);
444 
445    unsigned workgroup_size() const;
446 };
447 
448 /**
449  * Return the flag register used in fragment shaders to keep track of live
450  * samples.  On Gen7+ we use f1.0-f1.1 to allow discard jumps in SIMD32
451  * dispatch mode, while earlier generations are constrained to f0.1, which
452  * limits the dispatch width to SIMD16 for fragment shaders that use discard.
453  */
454 static inline unsigned
sample_mask_flag_subreg(const fs_visitor * shader)455 sample_mask_flag_subreg(const fs_visitor *shader)
456 {
457    assert(shader->stage == MESA_SHADER_FRAGMENT);
458    return shader->devinfo->gen >= 7 ? 2 : 1;
459 }
460 
461 /**
462  * The fragment shader code generator.
463  *
464  * Translates FS IR to actual i965 assembly code.
465  */
466 class fs_generator
467 {
468 public:
469    fs_generator(const struct brw_compiler *compiler, void *log_data,
470                 void *mem_ctx,
471                 struct brw_stage_prog_data *prog_data,
472                 bool runtime_check_aads_emit,
473                 gl_shader_stage stage);
474    ~fs_generator();
475 
476    void enable_debug(const char *shader_name);
477    int generate_code(const cfg_t *cfg, int dispatch_width,
478                      struct shader_stats shader_stats,
479                      const brw::performance &perf,
480                      struct brw_compile_stats *stats);
481    const unsigned *get_assembly();
482 
483 private:
484    void fire_fb_write(fs_inst *inst,
485                       struct brw_reg payload,
486                       struct brw_reg implied_header,
487                       GLuint nr);
488    void generate_send(fs_inst *inst,
489                       struct brw_reg dst,
490                       struct brw_reg desc,
491                       struct brw_reg ex_desc,
492                       struct brw_reg payload,
493                       struct brw_reg payload2);
494    void generate_fb_write(fs_inst *inst, struct brw_reg payload);
495    void generate_fb_read(fs_inst *inst, struct brw_reg dst,
496                          struct brw_reg payload);
497    void generate_urb_read(fs_inst *inst, struct brw_reg dst, struct brw_reg payload);
498    void generate_urb_write(fs_inst *inst, struct brw_reg payload);
499    void generate_cs_terminate(fs_inst *inst, struct brw_reg payload);
500    void generate_barrier(fs_inst *inst, struct brw_reg src);
501    bool generate_linterp(fs_inst *inst, struct brw_reg dst,
502 			 struct brw_reg *src);
503    void generate_tex(fs_inst *inst, struct brw_reg dst,
504                      struct brw_reg surface_index,
505                      struct brw_reg sampler_index);
506    void generate_get_buffer_size(fs_inst *inst, struct brw_reg dst,
507                                  struct brw_reg src,
508                                  struct brw_reg surf_index);
509    void generate_ddx(const fs_inst *inst,
510                      struct brw_reg dst, struct brw_reg src);
511    void generate_ddy(const fs_inst *inst,
512                      struct brw_reg dst, struct brw_reg src);
513    void generate_scratch_write(fs_inst *inst, struct brw_reg src);
514    void generate_scratch_read(fs_inst *inst, struct brw_reg dst);
515    void generate_scratch_read_gen7(fs_inst *inst, struct brw_reg dst);
516    void generate_uniform_pull_constant_load(fs_inst *inst, struct brw_reg dst,
517                                             struct brw_reg index,
518                                             struct brw_reg offset);
519    void generate_uniform_pull_constant_load_gen7(fs_inst *inst,
520                                                  struct brw_reg dst,
521                                                  struct brw_reg surf_index,
522                                                  struct brw_reg payload);
523    void generate_varying_pull_constant_load_gen4(fs_inst *inst,
524                                                  struct brw_reg dst,
525                                                  struct brw_reg index);
526    void generate_mov_dispatch_to_flags(fs_inst *inst);
527 
528    void generate_pixel_interpolator_query(fs_inst *inst,
529                                           struct brw_reg dst,
530                                           struct brw_reg src,
531                                           struct brw_reg msg_data,
532                                           unsigned msg_type);
533 
534    void generate_set_sample_id(fs_inst *inst,
535                                struct brw_reg dst,
536                                struct brw_reg src0,
537                                struct brw_reg src1);
538 
539    void generate_discard_jump(fs_inst *inst);
540 
541    void generate_pack_half_2x16_split(fs_inst *inst,
542                                       struct brw_reg dst,
543                                       struct brw_reg x,
544                                       struct brw_reg y);
545 
546    void generate_shader_time_add(fs_inst *inst,
547                                  struct brw_reg payload,
548                                  struct brw_reg offset,
549                                  struct brw_reg value);
550 
551    void generate_mov_indirect(fs_inst *inst,
552                               struct brw_reg dst,
553                               struct brw_reg reg,
554                               struct brw_reg indirect_byte_offset);
555 
556    void generate_shuffle(fs_inst *inst,
557                          struct brw_reg dst,
558                          struct brw_reg src,
559                          struct brw_reg idx);
560 
561    void generate_quad_swizzle(const fs_inst *inst,
562                               struct brw_reg dst, struct brw_reg src,
563                               unsigned swiz);
564 
565    bool patch_discard_jumps_to_fb_writes();
566 
567    const struct brw_compiler *compiler;
568    void *log_data; /* Passed to compiler->*_log functions */
569 
570    const struct gen_device_info *devinfo;
571 
572    struct brw_codegen *p;
573    struct brw_stage_prog_data * const prog_data;
574 
575    unsigned dispatch_width; /**< 8, 16 or 32 */
576 
577    exec_list discard_halt_patches;
578    bool runtime_check_aads_emit;
579    bool debug_flag;
580    const char *shader_name;
581    gl_shader_stage stage;
582    void *mem_ctx;
583 };
584 
585 namespace brw {
586    inline fs_reg
587    fetch_payload_reg(const brw::fs_builder &bld, uint8_t regs[2],
588                      brw_reg_type type = BRW_REGISTER_TYPE_F)
589    {
590       if (!regs[0])
591          return fs_reg();
592 
593       if (bld.dispatch_width() > 16) {
594          const fs_reg tmp = bld.vgrf(type);
595          const brw::fs_builder hbld = bld.exec_all().group(16, 0);
596          const unsigned m = bld.dispatch_width() / hbld.dispatch_width();
597          fs_reg *const components = new fs_reg[m];
598 
599          for (unsigned g = 0; g < m; g++)
600                components[g] = retype(brw_vec8_grf(regs[g], 0), type);
601 
602          hbld.LOAD_PAYLOAD(tmp, components, m, 0);
603 
604          delete[] components;
605          return tmp;
606 
607       } else {
608          return fs_reg(retype(brw_vec8_grf(regs[0], 0), type));
609       }
610    }
611 
612    inline fs_reg
fetch_barycentric_reg(const brw::fs_builder & bld,uint8_t regs[2])613    fetch_barycentric_reg(const brw::fs_builder &bld, uint8_t regs[2])
614    {
615       if (!regs[0])
616          return fs_reg();
617 
618       const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F, 2);
619       const brw::fs_builder hbld = bld.exec_all().group(8, 0);
620       const unsigned m = bld.dispatch_width() / hbld.dispatch_width();
621       fs_reg *const components = new fs_reg[2 * m];
622 
623       for (unsigned c = 0; c < 2; c++) {
624          for (unsigned g = 0; g < m; g++)
625             components[c * m + g] = offset(brw_vec8_grf(regs[g / 2], 0),
626                                            hbld, c + 2 * (g % 2));
627       }
628 
629       hbld.LOAD_PAYLOAD(tmp, components, 2 * m, 0);
630 
631       delete[] components;
632       return tmp;
633    }
634 
635    bool
636    lower_src_modifiers(fs_visitor *v, bblock_t *block, fs_inst *inst, unsigned i);
637 }
638 
639 void shuffle_from_32bit_read(const brw::fs_builder &bld,
640                              const fs_reg &dst,
641                              const fs_reg &src,
642                              uint32_t first_component,
643                              uint32_t components);
644 
645 fs_reg setup_imm_df(const brw::fs_builder &bld,
646                     double v);
647 
648 fs_reg setup_imm_b(const brw::fs_builder &bld,
649                    int8_t v);
650 
651 fs_reg setup_imm_ub(const brw::fs_builder &bld,
652                    uint8_t v);
653 
654 enum brw_barycentric_mode brw_barycentric_mode(enum glsl_interp_mode mode,
655                                                nir_intrinsic_op op);
656 
657 uint32_t brw_fb_write_msg_control(const fs_inst *inst,
658                                   const struct brw_wm_prog_data *prog_data);
659 
660 void brw_compute_urb_setup_index(struct brw_wm_prog_data *wm_prog_data);
661 
662 #endif /* BRW_FS_H */
663