1 /* -*- mesa-c++  -*-
2  *
3  * Copyright (c) 2019 Collabora LTD
4  *
5  * Author: Gert Wollny <gert.wollny@collabora.com>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * on the rights to use, copy, modify, merge, publish, distribute, sub
11  * license, and/or sell copies of the Software, and to permit persons to whom
12  * the Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the next
15  * paragraph) shall be included in all copies or substantial portions of the
16  * Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 #include "sfn_instruction_tex.h"
28 #include "nir_builder.h"
29 #include "nir_builtin_builder.h"
30 
31 namespace r600 {
32 
TexInstruction(Opcode op,const GPRVector & dest,const GPRVector & src,unsigned sid,unsigned rid,PValue sampler_offset)33 TexInstruction::TexInstruction(Opcode op, const GPRVector &dest, const GPRVector &src,
34                                unsigned sid, unsigned rid, PValue sampler_offset):
35    Instruction(tex),
36    m_opcode(op),
37    m_dst(dest),
38    m_src(src),
39    m_sampler_id(sid),
40    m_resource_id(rid),
41    m_flags(0),
42    m_inst_mode(0),
43    m_dest_swizzle{0,1,2,3},
44    m_sampler_offset(sampler_offset)
45 
46 {
47    memset(m_offset, 0, sizeof (m_offset));
48 
49    add_remappable_src_value(&m_src);
50    add_remappable_src_value(&m_sampler_offset);
51    add_remappable_dst_value(&m_dst);
52 }
53 
set_gather_comp(int cmp)54 void TexInstruction::set_gather_comp(int cmp)
55 {
56    m_inst_mode = cmp;
57 }
58 
replace_values(const ValueSet & candidates,PValue new_value)59 void TexInstruction::replace_values(const ValueSet& candidates, PValue new_value)
60 {
61    // I wonder whether we can actually end up here ...
62    for (auto c: candidates) {
63       if (*c == *m_src.reg_i(c->chan()))
64          m_src.set_reg_i(c->chan(), new_value);
65       if (*c == *m_dst.reg_i(c->chan()))
66          m_dst.set_reg_i(c->chan(), new_value);
67    }
68 }
69 
set_offset(unsigned index,int32_t val)70 void TexInstruction::set_offset(unsigned index, int32_t val)
71 {
72    assert(index < 3);
73    m_offset[index] = val;
74 }
75 
get_offset(unsigned index) const76 int TexInstruction::get_offset(unsigned index) const
77 {
78    assert(index < 3);
79    return (m_offset[index] << 1 & 0x1f);
80 }
81 
is_equal_to(const Instruction & rhs) const82 bool TexInstruction::is_equal_to(const Instruction& rhs) const
83 {
84    assert(rhs.type() == tex);
85    const auto& r = static_cast<const TexInstruction&>(rhs);
86    return (m_opcode == r.m_opcode &&
87            m_dst == r.m_dst &&
88            m_src == r.m_src &&
89            m_sampler_id == r.m_sampler_id &&
90            m_resource_id == r.m_resource_id);
91 }
92 
do_print(std::ostream & os) const93 void TexInstruction::do_print(std::ostream& os) const
94 {
95    const char *map_swz = "xyzw01?_";
96    os << opname(m_opcode) << " R" << m_dst.sel() << ".";
97    for (int i = 0; i < 4; ++i)
98       os << map_swz[m_dest_swizzle[i]];
99 
100    os << " " << m_src
101       << " RESID:"  << m_resource_id << " SAMPLER:"
102       << m_sampler_id;
103 }
104 
opname(Opcode op)105 const char *TexInstruction::opname(Opcode op)
106 {
107    switch (op) {
108    case ld: return "LD";
109    case get_resinfo: return "GET_TEXTURE_RESINFO";
110    case get_nsampled: return "GET_NUMBER_OF_SAMPLES";
111    case get_tex_lod: return "GET_LOD";
112    case get_gradient_h: return "GET_GRADIENTS_H";
113    case get_gradient_v: return "GET_GRADIENTS_V";
114    case set_offsets: return "SET_TEXTURE_OFFSETS";
115    case keep_gradients: return "KEEP_GRADIENTS";
116    case set_gradient_h: return "SET_GRADIENTS_H";
117    case set_gradient_v: return "SET_GRADIENTS_V";
118    case sample: return "SAMPLE";
119    case sample_l: return "SAMPLE_L";
120    case sample_lb: return "SAMPLE_LB";
121    case sample_lz: return "SAMPLE_LZ";
122    case sample_g: return "SAMPLE_G";
123    case sample_g_lb: return "SAMPLE_G_L";
124    case gather4: return "GATHER4";
125    case gather4_o: return "GATHER4_O";
126    case sample_c: return "SAMPLE_C";
127    case sample_c_l: return "SAMPLE_C_L";
128    case sample_c_lb: return "SAMPLE_C_LB";
129    case sample_c_lz: return "SAMPLE_C_LZ";
130    case sample_c_g: return "SAMPLE_C_G";
131    case sample_c_g_lb: return "SAMPLE_C_G_L";
132    case gather4_c: return "GATHER4_C";
133    case gather4_c_o: return "OP_GATHER4_C_O";
134    }
135    return "ERROR";
136 }
137 
138 
139 
lower_coord_shift_normalized(nir_builder * b,nir_tex_instr * tex)140 static bool lower_coord_shift_normalized(nir_builder *b, nir_tex_instr *tex)
141 {
142    b->cursor = nir_before_instr(&tex->instr);
143 
144    nir_ssa_def * size = nir_i2f32(b, nir_get_texture_size(b, tex));
145    nir_ssa_def *scale = nir_frcp(b, size);
146 
147    int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
148    nir_ssa_def *corr = nullptr;
149    if (unlikely(tex->array_is_lowered_cube)) {
150       auto corr2 = nir_fadd(b, nir_channels(b, tex->src[coord_index].src.ssa, 3),
151                             nir_fmul(b, nir_imm_float(b, -0.5f), scale));
152       corr = nir_vec3(b, nir_channel(b, corr2, 0), nir_channel(b, corr2, 1),
153                       nir_channel(
154                          b, tex->src[coord_index].src.ssa, 2));
155    } else {
156       corr = nir_fadd(b,
157                       nir_fmul(b, nir_imm_float(b, -0.5f), scale),
158                       tex->src[coord_index].src.ssa);
159    }
160 
161    nir_instr_rewrite_src(&tex->instr, &tex->src[coord_index].src,
162                          nir_src_for_ssa(corr));
163    return true;
164 }
165 
lower_coord_shift_unnormalized(nir_builder * b,nir_tex_instr * tex)166 static bool lower_coord_shift_unnormalized(nir_builder *b, nir_tex_instr *tex)
167 {
168    b->cursor = nir_before_instr(&tex->instr);
169    int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
170    nir_ssa_def *corr = nullptr;
171    if (unlikely(tex->array_is_lowered_cube)) {
172       auto corr2 = nir_fadd(b, nir_channels(b, tex->src[coord_index].src.ssa, 3),
173                             nir_imm_float(b, -0.5f));
174       corr = nir_vec3(b, nir_channel(b, corr2, 0), nir_channel(b, corr2, 1),
175                       nir_channel(b, tex->src[coord_index].src.ssa, 2));
176    } else {
177       corr = nir_fadd(b, tex->src[coord_index].src.ssa,
178                       nir_imm_float(b, -0.5f));
179    }
180    nir_instr_rewrite_src(&tex->instr, &tex->src[coord_index].src,
181                          nir_src_for_ssa(corr));
182    return true;
183 }
184 
185 static bool
r600_nir_lower_int_tg4_impl(nir_function_impl * impl)186 r600_nir_lower_int_tg4_impl(nir_function_impl *impl)
187 {
188    nir_builder b;
189    nir_builder_init(&b, impl);
190 
191    bool progress = false;
192    nir_foreach_block(block, impl) {
193       nir_foreach_instr_safe(instr, block) {
194          if (instr->type == nir_instr_type_tex) {
195             nir_tex_instr *tex = nir_instr_as_tex(instr);
196             if (tex->op == nir_texop_tg4 &&
197                 tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE) {
198                if (nir_alu_type_get_base_type(tex->dest_type) != nir_type_float) {
199                   if (tex->sampler_dim != GLSL_SAMPLER_DIM_RECT)
200                      lower_coord_shift_normalized(&b, tex);
201                   else
202                      lower_coord_shift_unnormalized(&b, tex);
203                   progress = true;
204                }
205             }
206          }
207       }
208    }
209    return progress;
210 }
211 
212 /*
213  * This lowering pass works around a bug in r600 when doing TG4 from
214  * integral valued samplers.
215 
216  * Gather4 should follow the same rules as bilinear filtering, but the hardware
217  * incorrectly forces nearest filtering if the texture format is integer.
218  * The only effect it has on Gather4, which always returns 4 texels for
219  * bilinear filtering, is that the final coordinates are off by 0.5 of
220  * the texel size.
221 */
222 
r600_nir_lower_int_tg4(nir_shader * shader)223 bool r600_nir_lower_int_tg4(nir_shader *shader)
224 {
225    bool progress = false;
226    bool need_lowering = false;
227 
228    nir_foreach_uniform_variable(var, shader) {
229       if (var->type->is_sampler()) {
230          if (glsl_base_type_is_integer(var->type->sampled_type)) {
231             need_lowering = true;
232          }
233       }
234    }
235 
236    if (need_lowering) {
237       nir_foreach_function(function, shader) {
238          if (function->impl && r600_nir_lower_int_tg4_impl(function->impl))
239             progress = true;
240       }
241    }
242 
243    return progress;
244 }
245 
246 static
lower_txl_txf_array_or_cube(nir_builder * b,nir_tex_instr * tex)247 bool lower_txl_txf_array_or_cube(nir_builder *b, nir_tex_instr *tex)
248 {
249    assert(tex->op == nir_texop_txb || tex->op == nir_texop_txl);
250    assert(nir_tex_instr_src_index(tex, nir_tex_src_ddx) < 0);
251    assert(nir_tex_instr_src_index(tex, nir_tex_src_ddy) < 0);
252 
253    b->cursor = nir_before_instr(&tex->instr);
254 
255    int lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_lod);
256    int bias_idx = nir_tex_instr_src_index(tex, nir_tex_src_bias);
257    int min_lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_min_lod);
258    assert (lod_idx >= 0 || bias_idx >= 0);
259 
260    nir_ssa_def *size = nir_i2f32(b, nir_get_texture_size(b, tex));
261    nir_ssa_def *lod = (lod_idx >= 0) ?
262                          nir_ssa_for_src(b, tex->src[lod_idx].src, 1) :
263                          nir_get_texture_lod(b, tex);
264 
265    if (bias_idx >= 0)
266       lod = nir_fadd(b, lod,nir_ssa_for_src(b, tex->src[bias_idx].src, 1));
267 
268    if (min_lod_idx >= 0)
269       lod = nir_fmax(b, lod, nir_ssa_for_src(b, tex->src[min_lod_idx].src, 1));
270 
271    /* max lod? */
272 
273    nir_ssa_def *lambda_exp =  nir_fexp2(b, lod);
274    nir_ssa_def *scale = NULL;
275 
276    if  (tex->is_array) {
277       int cmp_mask = (1 << (size->num_components - 1)) - 1;
278       scale = nir_frcp(b, nir_channels(b, size,
279                                        (nir_component_mask_t)cmp_mask));
280    } else if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
281       unsigned int swizzle[NIR_MAX_VEC_COMPONENTS] = {0,0,0,0};
282       scale = nir_frcp(b, nir_channels(b, size, 1));
283       scale = nir_swizzle(b, scale, swizzle, 3);
284    }
285 
286    nir_ssa_def *grad = nir_fmul(b, lambda_exp, scale);
287 
288    if (lod_idx >= 0)
289       nir_tex_instr_remove_src(tex, lod_idx);
290    if (bias_idx >= 0)
291       nir_tex_instr_remove_src(tex, bias_idx);
292    if (min_lod_idx >= 0)
293       nir_tex_instr_remove_src(tex, min_lod_idx);
294    nir_tex_instr_add_src(tex, nir_tex_src_ddx, nir_src_for_ssa(grad));
295    nir_tex_instr_add_src(tex, nir_tex_src_ddy, nir_src_for_ssa(grad));
296 
297    tex->op = nir_texop_txd;
298    return true;
299 }
300 
301 
302 static bool
r600_nir_lower_txl_txf_array_or_cube_impl(nir_function_impl * impl)303 r600_nir_lower_txl_txf_array_or_cube_impl(nir_function_impl *impl)
304 {
305    nir_builder b;
306    nir_builder_init(&b, impl);
307 
308    bool progress = false;
309    nir_foreach_block(block, impl) {
310       nir_foreach_instr_safe(instr, block) {
311          if (instr->type == nir_instr_type_tex) {
312             nir_tex_instr *tex = nir_instr_as_tex(instr);
313 
314             if (tex->is_shadow &&
315                 (tex->op == nir_texop_txl || tex->op == nir_texop_txb) &&
316                 (tex->is_array || tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE))
317                progress |= lower_txl_txf_array_or_cube(&b, tex);
318          }
319       }
320    }
321    return progress;
322 }
323 
324 bool
r600_nir_lower_txl_txf_array_or_cube(nir_shader * shader)325 r600_nir_lower_txl_txf_array_or_cube(nir_shader *shader)
326 {
327    bool progress = false;
328    nir_foreach_function(function, shader) {
329       if (function->impl && r600_nir_lower_txl_txf_array_or_cube_impl(function->impl))
330          progress = true;
331    }
332    return progress;
333 }
334 
335 static bool
r600_nir_lower_cube_to_2darray_filer(const nir_instr * instr,const void * _options)336 r600_nir_lower_cube_to_2darray_filer(const nir_instr *instr, const void *_options)
337 {
338    if (instr->type != nir_instr_type_tex)
339       return false;
340 
341    auto tex = nir_instr_as_tex(instr);
342    if (tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE)
343       return false;
344 
345    switch (tex->op) {
346    case nir_texop_tex:
347    case nir_texop_txb:
348    case nir_texop_txf:
349    case nir_texop_txl:
350    case nir_texop_lod:
351    case nir_texop_tg4:
352    case nir_texop_txd:
353       return true;
354    default:
355       return false;
356    }
357 }
358 
359 static nir_ssa_def *
r600_nir_lower_cube_to_2darray_impl(nir_builder * b,nir_instr * instr,void * _options)360 r600_nir_lower_cube_to_2darray_impl(nir_builder *b, nir_instr *instr, void *_options)
361 {
362    b->cursor = nir_before_instr(instr);
363 
364    auto tex = nir_instr_as_tex(instr);
365    int coord_idx = nir_tex_instr_src_index(tex, nir_tex_src_coord);
366    assert(coord_idx >= 0);
367 
368    auto cubed = nir_cube_r600(b, nir_channels(b, tex->src[coord_idx].src.ssa, 0x7));
369    auto xy = nir_fmad(b,
370                       nir_vec2(b, nir_channel(b, cubed, 1), nir_channel(b, cubed, 0)),
371                       nir_frcp(b, nir_fabs(b, nir_channel(b, cubed, 2))),
372                       nir_imm_float(b, 1.5));
373 
374    nir_ssa_def *z = nir_channel(b, cubed, 3);
375    if (tex->is_array) {
376       auto slice = nir_fround_even(b, nir_channel(b, tex->src[coord_idx].src.ssa, 3));
377       z = nir_fmad(b, nir_fmax(b, slice, nir_imm_float(b, 0.0)), nir_imm_float(b, 8.0),
378                    z);
379    }
380 
381    if (tex->op == nir_texop_txd) {
382       int ddx_idx = nir_tex_instr_src_index(tex, nir_tex_src_ddx);
383       auto zero_dot_5 = nir_imm_float(b, 0.5);
384       nir_instr_rewrite_src(&tex->instr, &tex->src[ddx_idx].src,
385                             nir_src_for_ssa(nir_fmul(b, nir_ssa_for_src(b, tex->src[ddx_idx].src, 3), zero_dot_5)));
386 
387       int ddy_idx = nir_tex_instr_src_index(tex, nir_tex_src_ddy);
388       nir_instr_rewrite_src(&tex->instr, &tex->src[ddy_idx].src,
389                             nir_src_for_ssa(nir_fmul(b, nir_ssa_for_src(b, tex->src[ddy_idx].src, 3), zero_dot_5)));
390    }
391 
392    auto new_coord = nir_vec3(b, nir_channel(b, xy, 0), nir_channel(b, xy, 1), z);
393    nir_instr_rewrite_src(&tex->instr, &tex->src[coord_idx].src,
394                          nir_src_for_ssa(new_coord));
395    tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
396    tex->is_array = true;
397    tex->array_is_lowered_cube = true;
398 
399    tex->coord_components = 3;
400 
401    return NIR_LOWER_INSTR_PROGRESS;
402 }
403 
404 bool
r600_nir_lower_cube_to_2darray(nir_shader * shader)405 r600_nir_lower_cube_to_2darray(nir_shader *shader)
406 {
407    return nir_shader_lower_instructions(shader,
408                                         r600_nir_lower_cube_to_2darray_filer,
409                                         r600_nir_lower_cube_to_2darray_impl, nullptr);
410 }
411 
412 
413 
414 }
415