1 /**************************************************************************
2  *
3  * Copyright 2010-2021 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
18  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * The above copyright notice and this permission notice (including the
23  * next paragraph) shall be included in all copies or substantial portions
24  * of the Software.
25  *
26  **************************************************************************/
27 
28 
29 #include "pipe/p_config.h"
30 
31 #include "util/u_math.h"
32 #include "util/u_cpu_detect.h"
33 #include "util/u_pack_color.h"
34 #include "util/u_rect.h"
35 #include "util/u_sse.h"
36 
37 #include "lp_jit.h"
38 #include "lp_rast.h"
39 #include "lp_debug.h"
40 #include "lp_state_fs.h"
41 #include "lp_linear_priv.h"
42 
43 
44 #if defined(PIPE_ARCH_SSE)
45 
46 
47 /* For debugging (LP_DEBUG=linear), shade areas of run-time fallback
48  * purple.  Keep blending active so we can see more of what's going
49  * on.
50  */
51 static boolean
linear_fallback(const struct lp_rast_state * state,unsigned x,unsigned y,unsigned width,unsigned height,uint8_t * color,unsigned stride)52 linear_fallback(const struct lp_rast_state *state,
53                 unsigned x, unsigned y,
54                 unsigned width, unsigned height,
55                 uint8_t *color,
56                 unsigned stride)
57 {
58    unsigned col = 0x808000ff;
59    int i;
60 
61    for (y = 0; y < height; y++) {
62       for (i = 0; i < 64; i++) {
63          *((uint32_t *)(color + y*stride) + x + i) = col;
64       }
65    }
66 
67    return TRUE;
68 }
69 
70 
71 /* Run our configurable linear shader pipeline:
72  */
73 static boolean
lp_fs_linear_run(const struct lp_rast_state * state,unsigned x,unsigned y,unsigned width,unsigned height,const float (* a0)[4],const float (* dadx)[4],const float (* dady)[4],uint8_t * color,unsigned stride)74 lp_fs_linear_run(const struct lp_rast_state *state,
75                  unsigned x, unsigned y,
76                  unsigned width, unsigned height,
77                  const float (*a0)[4],
78                  const float (*dadx)[4],
79                  const float (*dady)[4],
80                  uint8_t *color,
81                  unsigned stride)
82 {
83    const struct lp_fragment_shader_variant *variant = state->variant;
84    const struct lp_tgsi_info *info = &variant->shader->info;
85    struct lp_jit_linear_context jit;
86    lp_jit_linear_llvm_func jit_func = variant->jit_linear_llvm;
87 
88    struct lp_linear_sampler samp[LP_MAX_LINEAR_TEXTURES];
89    struct lp_linear_interp interp[LP_MAX_LINEAR_INPUTS];
90 
91    const float w0 = a0[0][3];
92    float oow = 1.0f/w0;
93 
94    unsigned input_mask = variant->linear_input_mask;
95    int nr_consts = info->base.file_max[TGSI_FILE_CONSTANT]+1;
96    int nr_tex = info->num_texs;
97    int i, j;
98 
99    LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
100 
101    /* Require constant w in these rectangles:
102     */
103    if (dadx[0][3] != 0.0f ||
104        dady[0][3] != 0.0f) {
105       if (LP_DEBUG & DEBUG_LINEAR2)
106          debug_printf("  -- w not constant\n");
107       goto fail;
108    }
109 
110    /* XXX: Per statechange:
111     */
112    if (variant->shader->base.type == PIPE_SHADER_IR_TGSI) {
113       uint8_t constants[LP_MAX_LINEAR_CONSTANTS][4];
114 
115       for (i = 0; i < nr_consts; i++) {
116          for (j = 0; j < 4; j++) {
117             float val = state->jit_context.constants[0][i*4+j];
118             if (val < 0.0f || val > 1.0f) {
119                if (LP_DEBUG & DEBUG_LINEAR2)
120                   debug_printf("  -- const[%d] out of range %f\n", i, val);
121                goto fail;
122             }
123             constants[i][j] = (uint8_t)(val * 255.0f);
124          }
125       }
126       jit.constants = (const uint8_t (*)[4])constants;
127    } else {
128       uint8_t nir_constants[LP_MAX_LINEAR_CONSTANTS * 4];
129 
130       for (i = 0; i < state->jit_context.num_constants[0]; i++){
131          float val = state->jit_context.constants[0][i];
132          if (val < 0.0f || val > 1.0f) {
133             if (LP_DEBUG & DEBUG_LINEAR2)
134                debug_printf("  -- const[%d] out of range %f\n", i, val);
135             goto fail;
136          }
137          nir_constants[i] = (uint8_t)(val * 255.0f);
138       }
139       jit.constants = (const uint8_t (*)[4])nir_constants;
140    }
141 
142    /* We assume BGRA ordering */
143    assert(variant->key.cbuf_format[0] == PIPE_FORMAT_B8G8R8X8_UNORM ||
144           variant->key.cbuf_format[0] == PIPE_FORMAT_B8G8R8A8_UNORM);
145 
146    jit.blend_color =
147          state->jit_context.u8_blend_color[32] +
148          (state->jit_context.u8_blend_color[16] << 8) +
149          (state->jit_context.u8_blend_color[0] << 16) +
150          (state->jit_context.u8_blend_color[48] << 24);
151 
152    jit.alpha_ref_value = float_to_ubyte(state->jit_context.alpha_ref_value);
153 
154    /* XXX: Per primitive:
155     */
156    while (input_mask) {
157       int i = u_bit_scan(&input_mask);
158       unsigned usage_mask = info->base.input_usage_mask[i];
159       boolean perspective =
160             info->base.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE ||
161             (info->base.input_interpolate[i] == TGSI_INTERPOLATE_COLOR &&
162              !variant->key.flatshade);
163 
164       if (!lp_linear_init_interp(&interp[i],
165                                  x, y, width, height,
166                                  usage_mask,
167                                  perspective,
168                                  oow,
169                                  a0[i+1],
170                                  dadx[i+1],
171                                  dady[i+1])) {
172          if (LP_DEBUG & DEBUG_LINEAR2)
173             debug_printf("  -- init_interp(%d) failed\n", i);
174          goto fail;
175       }
176 
177       jit.inputs[i] = &interp[i].base;
178    }
179 
180 
181    /* XXX: Per primitive: Initialize linear or nearest samplers:
182     */
183    for (i = 0; i < nr_tex; i++) {
184       const struct lp_tgsi_texture_info *tex_info = &info->tex[i];
185       unsigned unit = tex_info->sampler_unit;
186 
187       /* XXX: some texture coordinates are linear!
188        */
189       //boolean perspective = (info->base.input_interpolate[i] ==
190       //                       TGSI_INTERPOLATE_PERSPECTIVE);
191 
192       if (!lp_linear_init_sampler(&samp[i],
193                                   tex_info,
194                                   lp_fs_variant_key_sampler_idx(&variant->key, unit),
195                                   &state->jit_context.textures[unit],
196                                   x, y, width, height,
197                                   a0, dadx, dady)) {
198          if (LP_DEBUG & DEBUG_LINEAR2)
199             debug_printf("  -- init_sampler(%d) failed\n", i);
200          goto fail;
201       }
202 
203       jit.tex[i] = &samp[i].base;
204    }
205 
206    /* JIT function already does blending */
207    jit.color0 = color + x * 4 + y * stride;
208    for (y = 0; y < height; y++) {
209       jit_func(&jit, 0, 0, width);
210       jit.color0 += stride;
211    }
212 
213    return TRUE;
214 
215 fail:
216    /* Visually distinguish this from other fallbacks:
217     */
218    if (LP_DEBUG & DEBUG_LINEAR) {
219       return linear_fallback(state, x, y, width, height, color, stride);
220    }
221 
222    return FALSE;
223 }
224 
225 
226 static void
check_linear_interp_mask_a(struct lp_fragment_shader_variant * variant)227 check_linear_interp_mask_a(struct lp_fragment_shader_variant *variant)
228 {
229    const struct lp_tgsi_info *info = &variant->shader->info;
230    struct lp_jit_linear_context jit;
231 
232    struct lp_linear_sampler samp[LP_MAX_LINEAR_TEXTURES];
233    struct lp_linear_interp interp[LP_MAX_LINEAR_INPUTS];
234    uint8_t constants[LP_MAX_LINEAR_CONSTANTS][4];
235    PIPE_ALIGN_VAR(16) uint8_t color0[TILE_SIZE*4];
236 
237    int nr_inputs = info->base.file_max[TGSI_FILE_INPUT]+1;
238    int nr_tex = info->num_texs;
239    int i;
240 
241    LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
242 
243    jit.constants = (const uint8_t (*)[4])constants;
244 
245    for (i = 0; i < nr_tex; i++) {
246       lp_linear_init_noop_sampler(&samp[i]);
247       jit.tex[i] = &samp[i].base;
248    }
249 
250    for (i = 0; i < nr_inputs; i++) {
251       lp_linear_init_noop_interp(&interp[i]);
252       jit.inputs[i] = &interp[i].base;
253    }
254 
255    jit.color0 = color0;
256 
257    (void)variant->jit_linear_llvm(&jit, 0, 0, 0);
258 
259    /* Find out which interpolators were called, and store this as a
260     * mask:
261     */
262    for (i = 0; i < nr_inputs; i++)
263       variant->linear_input_mask |= (interp[i].row[0] << i);
264 }
265 
266 
267 /* Until the above is working, look at texture information and guess
268  * that any input used as a texture coordinate is not used for
269  * anything else.
270  */
271 static void
check_linear_interp_mask_b(struct lp_fragment_shader_variant * variant)272 check_linear_interp_mask_b(struct lp_fragment_shader_variant *variant)
273 {
274    const struct lp_tgsi_info *info = &variant->shader->info;
275    int nr_inputs = info->base.file_max[TGSI_FILE_INPUT]+1;
276    int nr_tex = info->num_texs;
277    unsigned tex_mask = 0;
278    int i;
279 
280    LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
281 
282    for (i = 0; i < nr_tex; i++) {
283       const struct lp_tgsi_texture_info *tex_info = &info->tex[i];
284       const struct lp_tgsi_channel_info *schan = &tex_info->coord[0];
285       const struct lp_tgsi_channel_info *tchan = &tex_info->coord[1];
286       tex_mask |= 1 << schan->u.index;
287       tex_mask |= 1 << tchan->u.index;
288    }
289 
290    variant->linear_input_mask = ((1 << nr_inputs) - 1) & ~tex_mask;
291 }
292 
293 
294 void
lp_linear_check_variant(struct lp_fragment_shader_variant * variant)295 lp_linear_check_variant(struct lp_fragment_shader_variant *variant)
296 {
297    const struct lp_fragment_shader_variant_key *key = &variant->key;
298    const struct lp_fragment_shader *shader = variant->shader;
299    const struct lp_tgsi_info *info = &shader->info;
300    int i;
301 
302    if (info->base.file_max[TGSI_FILE_CONSTANT] >= LP_MAX_LINEAR_CONSTANTS ||
303        info->base.file_max[TGSI_FILE_INPUT] >= LP_MAX_LINEAR_INPUTS) {
304       if (LP_DEBUG & DEBUG_LINEAR)
305          debug_printf("  -- too many inputs/constants\n");
306       goto fail;
307    }
308 
309    /* If we have a fastpath which implements the entire varient, use
310     * that.
311     */
312    if (lp_linear_check_fastpath(variant)) {
313       return;
314    }
315 
316    /* Otherwise, can we build up a spanline-based linear path for this
317     * variant?
318     */
319 
320    /* Check static sampler state.
321     */
322    for (i = 0; i < info->num_texs; i++) {
323       const struct lp_tgsi_texture_info *tex_info = &info->tex[i];
324       unsigned unit = tex_info->sampler_unit;
325 
326       /* XXX: Relax this once setup premultiplies by oow:
327        */
328       if (info->base.input_interpolate[unit] != TGSI_INTERPOLATE_PERSPECTIVE) {
329          if (LP_DEBUG & DEBUG_LINEAR)
330             debug_printf(" -- samp[%d]: texcoord not perspective\n", i);
331          goto fail;
332       }
333 
334       struct lp_sampler_static_state *samp = lp_fs_variant_key_sampler_idx(key, unit);
335       if (!lp_linear_check_sampler(samp, tex_info)) {
336          if (LP_DEBUG & DEBUG_LINEAR)
337             debug_printf(" -- samp[%d]: check_sampler failed\n", i);
338          goto fail;
339       }
340    }
341 
342    /* Check shader.  May not have been jitted.
343     */
344    if (variant->linear_function == NULL) {
345       if (LP_DEBUG & DEBUG_LINEAR)
346          debug_printf("  -- no linear shader\n");
347       goto fail;
348    }
349 
350    /* Hook in the catchall shader runner:
351     */
352    variant->jit_linear = lp_fs_linear_run;
353 
354    /* Figure out which inputs we don't need to interpolate (because
355     * they are only used as texture coordinates).  This is important
356     * as we can cope with texture coordinates which exceed 1.0, but
357     * cannot do so for regular inputs.
358     */
359    if (1)
360       check_linear_interp_mask_a(variant);
361    else
362       check_linear_interp_mask_b(variant);
363 
364 
365    if (0) {
366       lp_debug_fs_variant(variant);
367       debug_printf("linear input mask: 0x%x\n", variant->linear_input_mask);
368    }
369 
370    return;
371 
372 fail:
373    if (LP_DEBUG & DEBUG_LINEAR) {
374       lp_debug_fs_variant(variant);
375       debug_printf("    ----> no linear path for this variant\n");
376    }
377 }
378 
379 
380 #else
381 void
lp_linear_check_variant(struct lp_fragment_shader_variant * variant)382 lp_linear_check_variant(struct lp_fragment_shader_variant *variant)
383 {
384 }
385 #endif
386