1 /*
2  * Copyright © 2013 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 /**
25  * \file brw_vec4_tes.cpp
26  *
27  * Tessellaton evaluation shader specific code derived from the vec4_visitor class.
28  */
29 
30 #include "brw_vec4_tes.h"
31 #include "brw_cfg.h"
32 #include "dev/intel_debug.h"
33 
34 namespace brw {
35 
vec4_tes_visitor(const struct brw_compiler * compiler,void * log_data,const struct brw_tes_prog_key * key,struct brw_tes_prog_data * prog_data,const nir_shader * shader,void * mem_ctx,int shader_time_index,bool debug_enabled)36 vec4_tes_visitor::vec4_tes_visitor(const struct brw_compiler *compiler,
37                                   void *log_data,
38                                   const struct brw_tes_prog_key *key,
39                                   struct brw_tes_prog_data *prog_data,
40                                   const nir_shader *shader,
41                                   void *mem_ctx,
42                                   int shader_time_index,
43                                   bool debug_enabled)
44    : vec4_visitor(compiler, log_data, &key->base.tex, &prog_data->base,
45                   shader, mem_ctx, false, shader_time_index, debug_enabled)
46 {
47 }
48 
49 void
setup_payload()50 vec4_tes_visitor::setup_payload()
51 {
52    int reg = 0;
53 
54    /* The payload always contains important data in r0 and r1, which contains
55     * the URB handles that are passed on to the URB write at the end
56     * of the thread.
57     */
58    reg += 2;
59 
60    reg = setup_uniforms(reg);
61 
62    foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
63       for (int i = 0; i < 3; i++) {
64          if (inst->src[i].file != ATTR)
65             continue;
66 
67          unsigned slot = inst->src[i].nr + inst->src[i].offset / 16;
68          struct brw_reg grf = brw_vec4_grf(reg + slot / 2, 4 * (slot % 2));
69          grf = stride(grf, 0, 4, 1);
70          grf.swizzle = inst->src[i].swizzle;
71          grf.type = inst->src[i].type;
72          grf.abs = inst->src[i].abs;
73          grf.negate = inst->src[i].negate;
74          inst->src[i] = grf;
75       }
76    }
77 
78    reg += 8 * prog_data->urb_read_length;
79 
80    this->first_non_payload_grf = reg;
81 }
82 
83 
84 void
emit_prolog()85 vec4_tes_visitor::emit_prolog()
86 {
87    input_read_header = src_reg(this, glsl_type::uvec4_type);
88    emit(TES_OPCODE_CREATE_INPUT_READ_HEADER, dst_reg(input_read_header));
89 
90    this->current_annotation = NULL;
91 }
92 
93 
94 void
emit_urb_write_header(int mrf)95 vec4_tes_visitor::emit_urb_write_header(int mrf)
96 {
97    /* No need to do anything for DS; an implied write to this MRF will be
98     * performed by VS_OPCODE_URB_WRITE.
99     */
100    (void) mrf;
101 }
102 
103 
104 vec4_instruction *
emit_urb_write_opcode(bool complete)105 vec4_tes_visitor::emit_urb_write_opcode(bool complete)
106 {
107    /* For DS, the URB writes end the thread. */
108    if (complete) {
109       if (INTEL_DEBUG(DEBUG_SHADER_TIME))
110          emit_shader_time_end();
111    }
112 
113    vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
114    inst->urb_write_flags = complete ?
115       BRW_URB_WRITE_EOT_COMPLETE : BRW_URB_WRITE_NO_FLAGS;
116 
117    return inst;
118 }
119 
120 void
nir_emit_intrinsic(nir_intrinsic_instr * instr)121 vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
122 {
123    const struct brw_tes_prog_data *tes_prog_data =
124       (const struct brw_tes_prog_data *) prog_data;
125 
126    switch (instr->intrinsic) {
127    case nir_intrinsic_load_tess_coord:
128       /* gl_TessCoord is part of the payload in g1 channels 0-2 and 4-6. */
129       emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
130                src_reg(brw_vec8_grf(1, 0))));
131       break;
132    case nir_intrinsic_load_tess_level_outer:
133       if (tes_prog_data->domain == BRW_TESS_DOMAIN_ISOLINE) {
134          emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
135                   swizzle(src_reg(ATTR, 1, glsl_type::vec4_type),
136                           BRW_SWIZZLE_ZWZW)));
137       } else {
138          emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
139                   swizzle(src_reg(ATTR, 1, glsl_type::vec4_type),
140                           BRW_SWIZZLE_WZYX)));
141       }
142       break;
143    case nir_intrinsic_load_tess_level_inner:
144       if (tes_prog_data->domain == BRW_TESS_DOMAIN_QUAD) {
145          emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
146                   swizzle(src_reg(ATTR, 0, glsl_type::vec4_type),
147                           BRW_SWIZZLE_WZYX)));
148       } else {
149          emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
150                   src_reg(ATTR, 1, glsl_type::float_type)));
151       }
152       break;
153    case nir_intrinsic_load_primitive_id:
154       emit(TES_OPCODE_GET_PRIMITIVE_ID,
155            get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD));
156       break;
157 
158    case nir_intrinsic_load_input:
159    case nir_intrinsic_load_per_vertex_input: {
160       assert(nir_dest_bit_size(instr->dest) == 32);
161       src_reg indirect_offset = get_indirect_offset(instr);
162       unsigned imm_offset = instr->const_index[0];
163       src_reg header = input_read_header;
164       unsigned first_component = nir_intrinsic_component(instr);
165 
166       if (indirect_offset.file != BAD_FILE) {
167          src_reg clamped_indirect_offset = src_reg(this, glsl_type::uvec4_type);
168 
169          /* Page 190 of "Volume 7: 3D Media GPGPU Engine (Haswell)" says the
170           * valid range of the offset is [0, 0FFFFFFFh].
171           */
172          emit_minmax(BRW_CONDITIONAL_L,
173                      dst_reg(clamped_indirect_offset),
174                      retype(indirect_offset, BRW_REGISTER_TYPE_UD),
175                      brw_imm_ud(0x0fffffffu));
176 
177          header = src_reg(this, glsl_type::uvec4_type);
178          emit(TES_OPCODE_ADD_INDIRECT_URB_OFFSET, dst_reg(header),
179               input_read_header, clamped_indirect_offset);
180       } else {
181          /* Arbitrarily only push up to 24 vec4 slots worth of data,
182           * which is 12 registers (since each holds 2 vec4 slots).
183           */
184          const unsigned max_push_slots = 24;
185          if (imm_offset < max_push_slots) {
186             src_reg src = src_reg(ATTR, imm_offset, glsl_type::ivec4_type);
187             src.swizzle = BRW_SWZ_COMP_INPUT(first_component);
188 
189             emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D), src));
190 
191             prog_data->urb_read_length =
192                MAX2(prog_data->urb_read_length,
193                     DIV_ROUND_UP(imm_offset + 1, 2));
194             break;
195          }
196       }
197 
198       dst_reg temp(this, glsl_type::ivec4_type);
199       vec4_instruction *read =
200          emit(VEC4_OPCODE_URB_READ, temp, src_reg(header));
201       read->offset = imm_offset;
202       read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
203 
204       src_reg src = src_reg(temp);
205       src.swizzle = BRW_SWZ_COMP_INPUT(first_component);
206 
207       /* Copy to target.  We might end up with some funky writemasks landing
208        * in here, but we really don't want them in the above pseudo-ops.
209        */
210       dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
211       dst.writemask = brw_writemask_for_size(instr->num_components);
212       emit(MOV(dst, src));
213       break;
214    }
215    default:
216       vec4_visitor::nir_emit_intrinsic(instr);
217    }
218 }
219 
220 
221 void
emit_thread_end()222 vec4_tes_visitor::emit_thread_end()
223 {
224    /* For DS, we always end the thread by emitting a single vertex.
225     * emit_urb_write_opcode() will take care of setting the eot flag on the
226     * SEND instruction.
227     */
228    emit_vertex();
229 }
230 
231 } /* namespace brw */
232