1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #ifndef BRW_VEC4_H
25 #define BRW_VEC4_H
26 
27 #include "brw_shader.h"
28 
29 #ifdef __cplusplus
30 #include "brw_ir_vec4.h"
31 #include "brw_ir_performance.h"
32 #include "brw_vec4_builder.h"
33 #include "brw_vec4_live_variables.h"
34 #endif
35 
36 #include "compiler/glsl/ir.h"
37 #include "compiler/nir/nir.h"
38 
39 
40 #ifdef __cplusplus
41 extern "C" {
42 #endif
43 
44 const unsigned *
45 brw_vec4_generate_assembly(const struct brw_compiler *compiler,
46                            void *log_data,
47                            void *mem_ctx,
48                            const nir_shader *nir,
49                            struct brw_vue_prog_data *prog_data,
50                            const struct cfg_t *cfg,
51                            const brw::performance &perf,
52                            struct brw_compile_stats *stats,
53                            bool debug_enabled);
54 
55 #ifdef __cplusplus
56 } /* extern "C" */
57 
58 namespace brw {
59 /**
60  * The vertex shader front-end.
61  *
62  * Translates either GLSL IR or Mesa IR (for ARB_vertex_program and
63  * fixed-function) into VS IR.
64  */
65 class vec4_visitor : public backend_shader
66 {
67 public:
68    vec4_visitor(const struct brw_compiler *compiler,
69                 void *log_data,
70                 const struct brw_sampler_prog_key_data *key,
71                 struct brw_vue_prog_data *prog_data,
72                 const nir_shader *shader,
73 		void *mem_ctx,
74                 bool no_spills,
75                 int shader_time_index,
76                 bool debug_enabled);
77 
dst_null_f()78    dst_reg dst_null_f()
79    {
80       return dst_reg(brw_null_reg());
81    }
82 
dst_null_df()83    dst_reg dst_null_df()
84    {
85       return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_DF));
86    }
87 
dst_null_d()88    dst_reg dst_null_d()
89    {
90       return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
91    }
92 
dst_null_ud()93    dst_reg dst_null_ud()
94    {
95       return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
96    }
97 
98    const struct brw_sampler_prog_key_data * const key_tex;
99    struct brw_vue_prog_data * const prog_data;
100    char *fail_msg;
101    bool failed;
102 
103    /**
104     * GLSL IR currently being processed, which is associated with our
105     * driver IR instructions for debugging purposes.
106     */
107    const void *base_ir;
108    const char *current_annotation;
109 
110    int first_non_payload_grf;
111    unsigned ubo_push_start[4];
112    unsigned push_length;
113    unsigned int max_grf;
114    brw_analysis<brw::vec4_live_variables, backend_shader> live_analysis;
115    brw_analysis<brw::performance, vec4_visitor> performance_analysis;
116 
117    bool need_all_constants_in_pull_buffer;
118 
119    /* Regs for vertex results.  Generated at ir_variable visiting time
120     * for the ir->location's used.
121     */
122    dst_reg output_reg[VARYING_SLOT_TESS_MAX][4];
123    unsigned output_num_components[VARYING_SLOT_TESS_MAX][4];
124    const char *output_reg_annotation[VARYING_SLOT_TESS_MAX];
125    int uniforms;
126 
127    src_reg shader_start_time;
128 
129    bool run();
130    void fail(const char *msg, ...);
131 
132    int setup_uniforms(int payload_reg);
133 
134    bool reg_allocate_trivial();
135    bool reg_allocate();
136    void evaluate_spill_costs(float *spill_costs, bool *no_spill);
137    int choose_spill_reg(struct ra_graph *g);
138    void spill_reg(unsigned spill_reg);
139    void move_grf_array_access_to_scratch();
140    void move_uniform_array_access_to_pull_constants();
141    void move_push_constants_to_pull_constants();
142    void split_uniform_registers();
143    void pack_uniform_registers();
144    void setup_push_ranges();
145    virtual void invalidate_analysis(brw::analysis_dependency_class c);
146    void split_virtual_grfs();
147    bool opt_vector_float();
148    bool opt_reduce_swizzle();
149    bool dead_code_eliminate();
150    bool opt_cmod_propagation();
151    bool opt_copy_propagation(bool do_constant_prop = true);
152    bool opt_cse_local(bblock_t *block, const vec4_live_variables &live);
153    bool opt_cse();
154    bool opt_algebraic();
155    bool opt_register_coalesce();
156    bool eliminate_find_live_channel();
157    bool is_dep_ctrl_unsafe(const vec4_instruction *inst);
158    void opt_set_dependency_control();
159    void opt_schedule_instructions();
160    void convert_to_hw_regs();
161    void fixup_3src_null_dest();
162 
163    bool is_supported_64bit_region(vec4_instruction *inst, unsigned arg);
164    bool lower_simd_width();
165    bool scalarize_df();
166    bool lower_64bit_mad_to_mul_add();
167    void apply_logical_swizzle(struct brw_reg *hw_reg,
168                               vec4_instruction *inst, int arg);
169 
170    vec4_instruction *emit(vec4_instruction *inst);
171 
172    vec4_instruction *emit(enum opcode opcode);
173    vec4_instruction *emit(enum opcode opcode, const dst_reg &dst);
174    vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
175                           const src_reg &src0);
176    vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
177                           const src_reg &src0, const src_reg &src1);
178    vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
179                           const src_reg &src0, const src_reg &src1,
180                           const src_reg &src2);
181 
182    vec4_instruction *emit_before(bblock_t *block,
183                                  vec4_instruction *inst,
184 				 vec4_instruction *new_inst);
185 
186 #define EMIT1(op) vec4_instruction *op(const dst_reg &, const src_reg &);
187 #define EMIT2(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &);
188 #define EMIT3(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &, const src_reg &);
189    EMIT1(MOV)
190    EMIT1(NOT)
191    EMIT1(RNDD)
192    EMIT1(RNDE)
193    EMIT1(RNDZ)
194    EMIT1(FRC)
195    EMIT1(F32TO16)
196    EMIT1(F16TO32)
197    EMIT2(ADD)
198    EMIT2(MUL)
199    EMIT2(MACH)
200    EMIT2(MAC)
201    EMIT2(AND)
202    EMIT2(OR)
203    EMIT2(XOR)
204    EMIT2(DP3)
205    EMIT2(DP4)
206    EMIT2(DPH)
207    EMIT2(SHL)
208    EMIT2(SHR)
209    EMIT2(ASR)
210    vec4_instruction *CMP(dst_reg dst, src_reg src0, src_reg src1,
211 			 enum brw_conditional_mod condition);
212    vec4_instruction *IF(src_reg src0, src_reg src1,
213                         enum brw_conditional_mod condition);
214    vec4_instruction *IF(enum brw_predicate predicate);
215    EMIT1(SCRATCH_READ)
216    EMIT2(SCRATCH_WRITE)
217    EMIT3(LRP)
218    EMIT1(BFREV)
219    EMIT3(BFE)
220    EMIT2(BFI1)
221    EMIT3(BFI2)
222    EMIT1(FBH)
223    EMIT1(FBL)
224    EMIT1(CBIT)
225    EMIT3(MAD)
226    EMIT2(ADDC)
227    EMIT2(SUBB)
228    EMIT1(DIM)
229 
230 #undef EMIT1
231 #undef EMIT2
232 #undef EMIT3
233 
234    vec4_instruction *emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
235                                  src_reg src0, src_reg src1);
236 
237    /**
238     * Copy any live channel from \p src to the first channel of the
239     * result.
240     */
241    src_reg emit_uniformize(const src_reg &src);
242 
243    /** Fix all float operands of a 3-source instruction. */
244    void fix_float_operands(src_reg op[3], nir_alu_instr *instr);
245 
246    src_reg fix_3src_operand(const src_reg &src);
247 
248    vec4_instruction *emit_math(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
249                                const src_reg &src1 = src_reg());
250 
251    src_reg fix_math_operand(const src_reg &src);
252 
253    void emit_pack_half_2x16(dst_reg dst, src_reg src0);
254    void emit_unpack_half_2x16(dst_reg dst, src_reg src0);
255    void emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0);
256    void emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0);
257    void emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0);
258    void emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0);
259 
260    void emit_texture(ir_texture_opcode op,
261                      dst_reg dest,
262                      int dest_components,
263                      src_reg coordinate,
264                      int coord_components,
265                      src_reg shadow_comparator,
266                      src_reg lod, src_reg lod2,
267                      src_reg sample_index,
268                      uint32_t constant_offset,
269                      src_reg offset_value,
270                      src_reg mcs,
271                      uint32_t surface, src_reg surface_reg,
272                      src_reg sampler_reg);
273 
274    src_reg emit_mcs_fetch(const glsl_type *coordinate_type, src_reg coordinate,
275                           src_reg surface);
276    void emit_gfx6_gather_wa(uint8_t wa, dst_reg dst);
277 
278    void emit_ndc_computation();
279    void emit_psiz_and_flags(dst_reg reg);
280    vec4_instruction *emit_generic_urb_slot(dst_reg reg, int varying, int comp);
281    virtual void emit_urb_slot(dst_reg reg, int varying);
282 
283    void emit_shader_time_begin();
284    void emit_shader_time_end();
285    void emit_shader_time_write(int shader_time_subindex, src_reg value);
286 
287    src_reg get_scratch_offset(bblock_t *block, vec4_instruction *inst,
288 			      src_reg *reladdr, int reg_offset);
289    void emit_scratch_read(bblock_t *block, vec4_instruction *inst,
290 			  dst_reg dst,
291 			  src_reg orig_src,
292 			  int base_offset);
293    void emit_scratch_write(bblock_t *block, vec4_instruction *inst,
294 			   int base_offset);
295    void emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
296 				dst_reg dst,
297 				src_reg orig_src,
298                                 int base_offset,
299                                 src_reg indirect);
300    void emit_pull_constant_load_reg(dst_reg dst,
301                                     src_reg surf_index,
302                                     src_reg offset,
303                                     bblock_t *before_block,
304                                     vec4_instruction *before_inst);
305    src_reg emit_resolve_reladdr(int scratch_loc[], bblock_t *block,
306                                 vec4_instruction *inst, src_reg src);
307 
308    void resolve_ud_negate(src_reg *reg);
309 
310    bool lower_minmax();
311 
312    src_reg get_timestamp();
313 
314    void dump_instruction(const backend_instruction *inst) const;
315    void dump_instruction(const backend_instruction *inst, FILE *file) const;
316 
317    bool is_high_sampler(src_reg sampler);
318 
319    bool optimize_predicate(nir_alu_instr *instr, enum brw_predicate *predicate);
320 
321    void emit_conversion_from_double(dst_reg dst, src_reg src);
322    void emit_conversion_to_double(dst_reg dst, src_reg src);
323 
324    vec4_instruction *shuffle_64bit_data(dst_reg dst, src_reg src,
325                                         bool for_write,
326                                         bool for_scratch = false,
327                                         bblock_t *block = NULL,
328                                         vec4_instruction *ref = NULL);
329 
330    virtual void emit_nir_code();
331    virtual void nir_setup_uniforms();
332    virtual void nir_emit_impl(nir_function_impl *impl);
333    virtual void nir_emit_cf_list(exec_list *list);
334    virtual void nir_emit_if(nir_if *if_stmt);
335    virtual void nir_emit_loop(nir_loop *loop);
336    virtual void nir_emit_block(nir_block *block);
337    virtual void nir_emit_instr(nir_instr *instr);
338    virtual void nir_emit_load_const(nir_load_const_instr *instr);
339    src_reg get_nir_ssbo_intrinsic_index(nir_intrinsic_instr *instr);
340    virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr);
341    virtual void nir_emit_alu(nir_alu_instr *instr);
342    virtual void nir_emit_jump(nir_jump_instr *instr);
343    virtual void nir_emit_texture(nir_tex_instr *instr);
344    virtual void nir_emit_undef(nir_ssa_undef_instr *instr);
345    virtual void nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr);
346 
347    dst_reg get_nir_dest(const nir_dest &dest, enum brw_reg_type type);
348    dst_reg get_nir_dest(const nir_dest &dest, nir_alu_type type);
349    dst_reg get_nir_dest(const nir_dest &dest);
350    src_reg get_nir_src(const nir_src &src, enum brw_reg_type type,
351                        unsigned num_components = 4);
352    src_reg get_nir_src(const nir_src &src, nir_alu_type type,
353                        unsigned num_components = 4);
354    src_reg get_nir_src(const nir_src &src,
355                        unsigned num_components = 4);
356    src_reg get_nir_src_imm(const nir_src &src);
357    src_reg get_indirect_offset(nir_intrinsic_instr *instr);
358 
359    dst_reg *nir_locals;
360    dst_reg *nir_ssa_values;
361 
362 protected:
363    void emit_vertex();
364    void setup_payload_interference(struct ra_graph *g, int first_payload_node,
365                                    int reg_node_count);
366    virtual void setup_payload() = 0;
367    virtual void emit_prolog() = 0;
368    virtual void emit_thread_end() = 0;
369    virtual void emit_urb_write_header(int mrf) = 0;
370    virtual vec4_instruction *emit_urb_write_opcode(bool complete) = 0;
371    virtual void gs_emit_vertex(int stream_id);
372    virtual void gs_end_primitive();
373 
374 private:
375    /**
376     * If true, then register allocation should fail instead of spilling.
377     */
378    const bool no_spills;
379 
380    int shader_time_index;
381 
382    unsigned last_scratch; /**< measured in 32-byte (register size) units */
383 };
384 
385 } /* namespace brw */
386 #endif /* __cplusplus */
387 
388 #endif /* BRW_VEC4_H */
389