1 /**************************************************************************
2  *
3  * Copyright 2010-2021 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
18  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * The above copyright notice and this permission notice (including the
23  * next paragraph) shall be included in all copies or substantial portions
24  * of the Software.
25  *
26  **************************************************************************/
27 
28 
29 #include "util/u_memory.h"
30 #include "util/u_math.h"
31 #include "tgsi/tgsi_parse.h"
32 #include "tgsi/tgsi_text.h"
33 #include "tgsi/tgsi_util.h"
34 #include "tgsi/tgsi_dump.h"
35 #include "lp_debug.h"
36 #include "lp_state.h"
37 #include "nir.h"
38 
39 /*
40  * Detect Aero minification shaders.
41  *
42  * Aero does not use texture mimaps when a window gets animated and its shaped
43  * bended. Instead it uses the average of 4 nearby texels. This is the simplest
44  * of such shader, but there are several variations:
45  *
46  *   FRAG
47  *   DCL IN[0], GENERIC[1], PERSPECTIVE
48  *   DCL IN[1], GENERIC[2], PERSPECTIVE
49  *   DCL IN[2], GENERIC[3], PERSPECTIVE
50  *   DCL OUT[0], COLOR
51  *   DCL SAMP[0]
52  *   DCL TEMP[0..3]
53  *   IMM FLT32 {     0.2500,     0.0000,     0.0000,     0.0000 }
54  *   MOV TEMP[0].x, IN[0].zzzz
55  *   MOV TEMP[0].y, IN[0].wwww
56  *   MOV TEMP[1].x, IN[1].zzzz
57  *   MOV TEMP[1].y, IN[1].wwww
58  *   TEX TEMP[0], TEMP[0], SAMP[0], 2D
59  *   TEX TEMP[2], IN[0], SAMP[0], 2D
60  *   TEX TEMP[3], IN[1], SAMP[0], 2D
61  *   TEX TEMP[1], TEMP[1], SAMP[0], 2D
62  *   ADD TEMP[0], TEMP[0], TEMP[2]
63  *   ADD TEMP[0], TEMP[3], TEMP[0]
64  *   ADD TEMP[0], TEMP[1], TEMP[0]
65  *   MUL TEMP[0], TEMP[0], IN[2]
66  *   MUL TEMP[0], TEMP[0], IMM[0].xxxx
67  *   MOV OUT[0], TEMP[0]
68  *   END
69  *
70  * Texture coordinates are interleaved like the Gaussian blur shaders, but
71  * unlike the later there isn't structure in the sub-pixel positioning of the
72  * texels, other than being disposed in a diamond-like shape. For example,
73  * these are the relative offsets of the texels relative to the average:
74  *
75  *    x offset   y offset
76  *   --------------------
77  *    0.691834   -0.21360
78  *   -0.230230   -0.64160
79  *   -0.692406    0.21356
80  *    0.230802    0.64160
81  *
82  *  These shaders are typically used with linear min/mag filtering, but the
83  *  linear filtering provides very little visual improvement compared to the
84  *  performance impact it has. The ultimate purpose of detecting these shaders
85  *  is to override with nearest texture filtering.
86  */
87 static inline boolean
match_aero_minification_shader(const struct tgsi_token * tokens,const struct lp_tgsi_info * info)88 match_aero_minification_shader(const struct tgsi_token *tokens,
89                                const struct lp_tgsi_info *info)
90 {
91    struct tgsi_parse_context parse;
92    unsigned coord_mask;
93    boolean has_quarter_imm;
94    unsigned index, chan;
95 
96    if ((info->base.opcode_count[TGSI_OPCODE_TEX] != 4 &&
97         info->base.opcode_count[TGSI_OPCODE_SAMPLE] != 4) ||
98        info->num_texs != 4) {
99       return FALSE;
100    }
101 
102    /*
103     * Ensure the texture coordinates are interleaved as in the example above.
104     */
105 
106    coord_mask = 0;
107    for (index = 0; index < 4; ++index) {
108       const struct lp_tgsi_texture_info *tex = &info->tex[index];
109       if (tex->sampler_unit != 0 ||
110           tex->texture_unit != 0 ||
111           tex->coord[0].file != TGSI_FILE_INPUT ||
112           tex->coord[1].file != TGSI_FILE_INPUT ||
113           tex->coord[0].u.index != tex->coord[1].u.index ||
114           (tex->coord[0].swizzle % 2) != 0 ||
115           tex->coord[1].swizzle != tex->coord[0].swizzle + 1) {
116          return FALSE;
117       }
118 
119       coord_mask |= 1 << (tex->coord[0].u.index*2 + tex->coord[0].swizzle/2);
120    }
121    if (coord_mask != 0xf) {
122       return FALSE;
123    }
124 
125    /*
126     * Ensure it has the 0.25 immediate.
127     */
128 
129    has_quarter_imm = FALSE;
130 
131    tgsi_parse_init(&parse, tokens);
132 
133    while (!tgsi_parse_end_of_tokens(&parse)) {
134       tgsi_parse_token(&parse);
135 
136       switch (parse.FullToken.Token.Type) {
137       case TGSI_TOKEN_TYPE_DECLARATION:
138          break;
139 
140       case TGSI_TOKEN_TYPE_INSTRUCTION:
141          goto finished;
142 
143       case TGSI_TOKEN_TYPE_IMMEDIATE:
144          {
145             const unsigned size =
146                   parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
147             assert(size <= 4);
148             for (chan = 0; chan < size; ++chan) {
149                if (parse.FullToken.FullImmediate.u[chan].Float == 0.25f) {
150                   has_quarter_imm = TRUE;
151                   goto finished;
152                }
153             }
154          }
155          break;
156 
157       case TGSI_TOKEN_TYPE_PROPERTY:
158          break;
159 
160       default:
161          assert(0);
162          goto finished;
163       }
164    }
165 finished:
166 
167    tgsi_parse_free(&parse);
168 
169    if (!has_quarter_imm) {
170       return FALSE;
171    }
172 
173    return TRUE;
174 }
175 
176 static bool
llvmpipe_nir_fn_is_linear_compat(struct nir_shader * shader,nir_function_impl * impl,struct lp_tgsi_info * info)177 llvmpipe_nir_fn_is_linear_compat(struct nir_shader *shader,
178                                  nir_function_impl *impl,
179                                  struct lp_tgsi_info *info)
180 {
181    nir_foreach_block(block, impl) {
182       nir_foreach_instr_safe(instr, block) {
183          switch (instr->type) {
184          case nir_instr_type_deref:
185          case nir_instr_type_load_const:
186             break;
187          case nir_instr_type_intrinsic: {
188             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
189             if (intrin->intrinsic != nir_intrinsic_load_deref &&
190                 intrin->intrinsic != nir_intrinsic_store_deref &&
191                 intrin->intrinsic != nir_intrinsic_load_ubo)
192                return false;
193 
194             if (intrin->intrinsic == nir_intrinsic_load_ubo) {
195                if (!nir_src_is_const(intrin->src[0]))
196                   return false;
197                nir_load_const_instr *load =
198                   nir_instr_as_load_const(intrin->src[0].ssa->parent_instr);
199                if (load->value[0].u32 != 0)
200                   return false;
201             }
202             break;
203          }
204          case nir_instr_type_tex: {
205             nir_tex_instr *tex = nir_instr_as_tex(instr);
206             struct lp_tgsi_texture_info *tex_info = &info->tex[info->num_texs];
207 
208             for (unsigned i = 0; i < tex->num_srcs; i++) {
209                switch (tex->src[i].src_type) {
210                case nir_tex_src_coord: {
211                   nir_ssa_scalar scalar = nir_ssa_scalar_resolved(tex->src[i].src.ssa, 0);
212                   if (scalar.def->parent_instr->type != nir_instr_type_intrinsic)
213                      return false;
214                   nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(scalar.def->parent_instr);
215                   if (intrin->intrinsic != nir_intrinsic_load_deref)
216                      return false;
217                   nir_deref_instr *deref = nir_instr_as_deref(intrin->src[0].ssa->parent_instr);
218                   nir_variable *var = nir_deref_instr_get_variable(deref);
219                   if (var->data.mode != nir_var_shader_in)
220                      return false;
221                   break;
222                }
223                default:
224                   continue;
225                }
226             }
227 
228             switch (tex->op) {
229             case nir_texop_tex:
230                tex_info->modifier = LP_BLD_TEX_MODIFIER_NONE;
231                break;
232             default:
233                /* inaccurate but sufficient. */
234                tex_info->modifier = LP_BLD_TEX_MODIFIER_EXPLICIT_LOD;
235                return false;
236             }
237             switch (tex->sampler_dim) {
238             case GLSL_SAMPLER_DIM_2D:
239                tex_info->target = TGSI_TEXTURE_2D;
240                break;
241             default:
242                /* inaccurate but sufficient. */
243                tex_info->target = TGSI_TEXTURE_1D;
244                return false;
245             }
246 
247             tex_info->sampler_unit = tex->sampler_index;
248 
249             /* this is enforced in the scanner previously. */
250             tex_info->coord[0].file = TGSI_FILE_INPUT;
251             tex_info->coord[1].file = TGSI_FILE_INPUT;
252             tex_info->coord[1].swizzle = 1;
253             info->num_texs++;
254             break;
255          }
256          case nir_instr_type_alu: {
257             nir_alu_instr *alu = nir_instr_as_alu(instr);
258             if (alu->op != nir_op_mov &&
259                 alu->op != nir_op_vec2 &&
260                 alu->op != nir_op_vec4 &&
261                 alu->op != nir_op_fmul)
262                return false;
263 
264             if (alu->op == nir_op_fmul) {
265                unsigned num_src = nir_op_infos[alu->op].num_inputs;;
266                for (unsigned s = 0; s < num_src; s++) {
267                   if (nir_src_is_const(alu->src[s].src)) {
268                      nir_load_const_instr *load =
269                         nir_instr_as_load_const(alu->src[s].src.ssa->parent_instr);
270 
271                      if (load->def.bit_size != 32)
272                         return false;
273                      for (unsigned c = 0; c < load->def.num_components; c++) {
274                         if (load->value[c].f32 < 0.0 || load->value[c].f32 > 1.0) {
275                            info->unclamped_immediates = true;
276                            return false;
277                         }
278                      }
279                   }
280                }
281             }
282             break;
283          }
284          default:
285             return false;
286          }
287       }
288    }
289    return true;
290 }
291 
292 static bool
llvmpipe_nir_is_linear_compat(struct nir_shader * shader,struct lp_tgsi_info * info)293 llvmpipe_nir_is_linear_compat(struct nir_shader *shader,
294                               struct lp_tgsi_info *info)
295 {
296    nir_foreach_function(function, shader) {
297       if (function->impl) {
298          if (!llvmpipe_nir_fn_is_linear_compat(shader, function->impl, info))
299             return false;
300       }
301    }
302    return true;
303 }
304 
305 void
llvmpipe_fs_analyse_nir(struct lp_fragment_shader * shader)306 llvmpipe_fs_analyse_nir(struct lp_fragment_shader *shader)
307 {
308    shader->kind = LP_FS_KIND_GENERAL;
309 
310    if (shader->info.base.num_inputs <= LP_MAX_LINEAR_INPUTS &&
311        shader->info.base.num_outputs == 1 &&
312        !shader->info.indirect_textures &&
313        !shader->info.sampler_texture_units_different &&
314        !shader->info.unclamped_immediates &&
315        shader->info.num_texs <= LP_MAX_LINEAR_TEXTURES &&
316        llvmpipe_nir_is_linear_compat(shader->base.ir.nir, &shader->info)) {
317       shader->kind = LP_FS_KIND_LLVM_LINEAR;
318    }
319 }
320 
321 void
llvmpipe_fs_analyse(struct lp_fragment_shader * shader,const struct tgsi_token * tokens)322 llvmpipe_fs_analyse(struct lp_fragment_shader *shader,
323                     const struct tgsi_token *tokens)
324 {
325    shader->kind = LP_FS_KIND_GENERAL;
326 
327    if (shader->kind == LP_FS_KIND_GENERAL &&
328        shader->info.base.num_inputs <= LP_MAX_LINEAR_INPUTS &&
329        shader->info.base.num_outputs == 1 &&
330        !shader->info.indirect_textures &&
331        !shader->info.sampler_texture_units_different &&
332        !shader->info.unclamped_immediates &&
333        shader->info.num_texs <= LP_MAX_LINEAR_TEXTURES &&
334        (shader->info.base.opcode_count[TGSI_OPCODE_TEX] +
335         shader->info.base.opcode_count[TGSI_OPCODE_SAMPLE] +
336         shader->info.base.opcode_count[TGSI_OPCODE_MOV] +
337         shader->info.base.opcode_count[TGSI_OPCODE_MUL] +
338         shader->info.base.opcode_count[TGSI_OPCODE_RET] +
339         shader->info.base.opcode_count[TGSI_OPCODE_END] ==
340         shader->info.base.num_instructions)) {
341       shader->kind = LP_FS_KIND_LLVM_LINEAR;
342    }
343 
344    if (shader->kind == LP_FS_KIND_GENERAL &&
345        match_aero_minification_shader(tokens, &shader->info)) {
346       shader->kind = LP_FS_KIND_AERO_MINIFICATION;
347    }
348 }
349