1 /**************************************************************************
2 *
3 * Copyright 2010-2021 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
18 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 *
26 **************************************************************************/
27
28
29 #include "pipe/p_config.h"
30
31 #include "util/u_math.h"
32 #include "util/u_cpu_detect.h"
33 #include "util/u_pack_color.h"
34 #include "util/u_rect.h"
35 #include "util/u_sse.h"
36
37 #include "lp_jit.h"
38 #include "lp_rast.h"
39 #include "lp_debug.h"
40 #include "lp_state_fs.h"
41 #include "lp_linear_priv.h"
42
43
44 #if defined(PIPE_ARCH_SSE)
45
46
47 /* For debugging (LP_DEBUG=linear), shade areas of run-time fallback
48 * purple. Keep blending active so we can see more of what's going
49 * on.
50 */
51 static boolean
linear_fallback(const struct lp_rast_state * state,unsigned x,unsigned y,unsigned width,unsigned height,uint8_t * color,unsigned stride)52 linear_fallback(const struct lp_rast_state *state,
53 unsigned x, unsigned y,
54 unsigned width, unsigned height,
55 uint8_t *color,
56 unsigned stride)
57 {
58 unsigned col = 0x808000ff;
59 int i;
60
61 for (y = 0; y < height; y++) {
62 for (i = 0; i < 64; i++) {
63 *((uint32_t *)(color + y*stride) + x + i) = col;
64 }
65 }
66
67 return TRUE;
68 }
69
70
71 /* Run our configurable linear shader pipeline:
72 */
73 static boolean
lp_fs_linear_run(const struct lp_rast_state * state,unsigned x,unsigned y,unsigned width,unsigned height,const float (* a0)[4],const float (* dadx)[4],const float (* dady)[4],uint8_t * color,unsigned stride)74 lp_fs_linear_run(const struct lp_rast_state *state,
75 unsigned x, unsigned y,
76 unsigned width, unsigned height,
77 const float (*a0)[4],
78 const float (*dadx)[4],
79 const float (*dady)[4],
80 uint8_t *color,
81 unsigned stride)
82 {
83 const struct lp_fragment_shader_variant *variant = state->variant;
84 const struct lp_tgsi_info *info = &variant->shader->info;
85 struct lp_jit_linear_context jit;
86 lp_jit_linear_llvm_func jit_func = variant->jit_linear_llvm;
87
88 struct lp_linear_sampler samp[LP_MAX_LINEAR_TEXTURES];
89 struct lp_linear_interp interp[LP_MAX_LINEAR_INPUTS];
90
91 const float w0 = a0[0][3];
92 float oow = 1.0f/w0;
93
94 unsigned input_mask = variant->linear_input_mask;
95 int nr_consts = info->base.file_max[TGSI_FILE_CONSTANT]+1;
96 int nr_tex = info->num_texs;
97 int i, j;
98
99 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
100
101 /* Require constant w in these rectangles:
102 */
103 if (dadx[0][3] != 0.0f ||
104 dady[0][3] != 0.0f) {
105 if (LP_DEBUG & DEBUG_LINEAR2)
106 debug_printf(" -- w not constant\n");
107 goto fail;
108 }
109
110 /* XXX: Per statechange:
111 */
112 if (variant->shader->base.type == PIPE_SHADER_IR_TGSI) {
113 uint8_t constants[LP_MAX_LINEAR_CONSTANTS][4];
114
115 for (i = 0; i < nr_consts; i++) {
116 for (j = 0; j < 4; j++) {
117 float val = state->jit_context.constants[0][i*4+j];
118 if (val < 0.0f || val > 1.0f) {
119 if (LP_DEBUG & DEBUG_LINEAR2)
120 debug_printf(" -- const[%d] out of range %f\n", i, val);
121 goto fail;
122 }
123 constants[i][j] = (uint8_t)(val * 255.0f);
124 }
125 }
126 jit.constants = (const uint8_t (*)[4])constants;
127 } else {
128 uint8_t nir_constants[LP_MAX_LINEAR_CONSTANTS * 4];
129
130 for (i = 0; i < state->jit_context.num_constants[0]; i++){
131 float val = state->jit_context.constants[0][i];
132 if (val < 0.0f || val > 1.0f) {
133 if (LP_DEBUG & DEBUG_LINEAR2)
134 debug_printf(" -- const[%d] out of range %f\n", i, val);
135 goto fail;
136 }
137 nir_constants[i] = (uint8_t)(val * 255.0f);
138 }
139 jit.constants = (const uint8_t (*)[4])nir_constants;
140 }
141
142 /* We assume BGRA ordering */
143 assert(variant->key.cbuf_format[0] == PIPE_FORMAT_B8G8R8X8_UNORM ||
144 variant->key.cbuf_format[0] == PIPE_FORMAT_B8G8R8A8_UNORM);
145
146 jit.blend_color =
147 state->jit_context.u8_blend_color[32] +
148 (state->jit_context.u8_blend_color[16] << 8) +
149 (state->jit_context.u8_blend_color[0] << 16) +
150 (state->jit_context.u8_blend_color[48] << 24);
151
152 jit.alpha_ref_value = float_to_ubyte(state->jit_context.alpha_ref_value);
153
154 /* XXX: Per primitive:
155 */
156 while (input_mask) {
157 int i = u_bit_scan(&input_mask);
158 unsigned usage_mask = info->base.input_usage_mask[i];
159 boolean perspective =
160 info->base.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE ||
161 (info->base.input_interpolate[i] == TGSI_INTERPOLATE_COLOR &&
162 !variant->key.flatshade);
163
164 if (!lp_linear_init_interp(&interp[i],
165 x, y, width, height,
166 usage_mask,
167 perspective,
168 oow,
169 a0[i+1],
170 dadx[i+1],
171 dady[i+1])) {
172 if (LP_DEBUG & DEBUG_LINEAR2)
173 debug_printf(" -- init_interp(%d) failed\n", i);
174 goto fail;
175 }
176
177 jit.inputs[i] = &interp[i].base;
178 }
179
180
181 /* XXX: Per primitive: Initialize linear or nearest samplers:
182 */
183 for (i = 0; i < nr_tex; i++) {
184 const struct lp_tgsi_texture_info *tex_info = &info->tex[i];
185 unsigned unit = tex_info->sampler_unit;
186
187 /* XXX: some texture coordinates are linear!
188 */
189 //boolean perspective = (info->base.input_interpolate[i] ==
190 // TGSI_INTERPOLATE_PERSPECTIVE);
191
192 if (!lp_linear_init_sampler(&samp[i],
193 tex_info,
194 lp_fs_variant_key_sampler_idx(&variant->key, unit),
195 &state->jit_context.textures[unit],
196 x, y, width, height,
197 a0, dadx, dady)) {
198 if (LP_DEBUG & DEBUG_LINEAR2)
199 debug_printf(" -- init_sampler(%d) failed\n", i);
200 goto fail;
201 }
202
203 jit.tex[i] = &samp[i].base;
204 }
205
206 /* JIT function already does blending */
207 jit.color0 = color + x * 4 + y * stride;
208 for (y = 0; y < height; y++) {
209 jit_func(&jit, 0, 0, width);
210 jit.color0 += stride;
211 }
212
213 return TRUE;
214
215 fail:
216 /* Visually distinguish this from other fallbacks:
217 */
218 if (LP_DEBUG & DEBUG_LINEAR) {
219 return linear_fallback(state, x, y, width, height, color, stride);
220 }
221
222 return FALSE;
223 }
224
225
226 static void
check_linear_interp_mask_a(struct lp_fragment_shader_variant * variant)227 check_linear_interp_mask_a(struct lp_fragment_shader_variant *variant)
228 {
229 const struct lp_tgsi_info *info = &variant->shader->info;
230 struct lp_jit_linear_context jit;
231
232 struct lp_linear_sampler samp[LP_MAX_LINEAR_TEXTURES];
233 struct lp_linear_interp interp[LP_MAX_LINEAR_INPUTS];
234 uint8_t constants[LP_MAX_LINEAR_CONSTANTS][4];
235 PIPE_ALIGN_VAR(16) uint8_t color0[TILE_SIZE*4];
236
237 int nr_inputs = info->base.file_max[TGSI_FILE_INPUT]+1;
238 int nr_tex = info->num_texs;
239 int i;
240
241 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
242
243 jit.constants = (const uint8_t (*)[4])constants;
244
245 for (i = 0; i < nr_tex; i++) {
246 lp_linear_init_noop_sampler(&samp[i]);
247 jit.tex[i] = &samp[i].base;
248 }
249
250 for (i = 0; i < nr_inputs; i++) {
251 lp_linear_init_noop_interp(&interp[i]);
252 jit.inputs[i] = &interp[i].base;
253 }
254
255 jit.color0 = color0;
256
257 (void)variant->jit_linear_llvm(&jit, 0, 0, 0);
258
259 /* Find out which interpolators were called, and store this as a
260 * mask:
261 */
262 for (i = 0; i < nr_inputs; i++)
263 variant->linear_input_mask |= (interp[i].row[0] << i);
264 }
265
266
267 /* Until the above is working, look at texture information and guess
268 * that any input used as a texture coordinate is not used for
269 * anything else.
270 */
271 static void
check_linear_interp_mask_b(struct lp_fragment_shader_variant * variant)272 check_linear_interp_mask_b(struct lp_fragment_shader_variant *variant)
273 {
274 const struct lp_tgsi_info *info = &variant->shader->info;
275 int nr_inputs = info->base.file_max[TGSI_FILE_INPUT]+1;
276 int nr_tex = info->num_texs;
277 unsigned tex_mask = 0;
278 int i;
279
280 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
281
282 for (i = 0; i < nr_tex; i++) {
283 const struct lp_tgsi_texture_info *tex_info = &info->tex[i];
284 const struct lp_tgsi_channel_info *schan = &tex_info->coord[0];
285 const struct lp_tgsi_channel_info *tchan = &tex_info->coord[1];
286 tex_mask |= 1 << schan->u.index;
287 tex_mask |= 1 << tchan->u.index;
288 }
289
290 variant->linear_input_mask = ((1 << nr_inputs) - 1) & ~tex_mask;
291 }
292
293
294 void
lp_linear_check_variant(struct lp_fragment_shader_variant * variant)295 lp_linear_check_variant(struct lp_fragment_shader_variant *variant)
296 {
297 const struct lp_fragment_shader_variant_key *key = &variant->key;
298 const struct lp_fragment_shader *shader = variant->shader;
299 const struct lp_tgsi_info *info = &shader->info;
300 int i;
301
302 if (info->base.file_max[TGSI_FILE_CONSTANT] >= LP_MAX_LINEAR_CONSTANTS ||
303 info->base.file_max[TGSI_FILE_INPUT] >= LP_MAX_LINEAR_INPUTS) {
304 if (LP_DEBUG & DEBUG_LINEAR)
305 debug_printf(" -- too many inputs/constants\n");
306 goto fail;
307 }
308
309 /* If we have a fastpath which implements the entire varient, use
310 * that.
311 */
312 if (lp_linear_check_fastpath(variant)) {
313 return;
314 }
315
316 /* Otherwise, can we build up a spanline-based linear path for this
317 * variant?
318 */
319
320 /* Check static sampler state.
321 */
322 for (i = 0; i < info->num_texs; i++) {
323 const struct lp_tgsi_texture_info *tex_info = &info->tex[i];
324 unsigned unit = tex_info->sampler_unit;
325
326 /* XXX: Relax this once setup premultiplies by oow:
327 */
328 if (info->base.input_interpolate[unit] != TGSI_INTERPOLATE_PERSPECTIVE) {
329 if (LP_DEBUG & DEBUG_LINEAR)
330 debug_printf(" -- samp[%d]: texcoord not perspective\n", i);
331 goto fail;
332 }
333
334 struct lp_sampler_static_state *samp = lp_fs_variant_key_sampler_idx(key, unit);
335 if (!lp_linear_check_sampler(samp, tex_info)) {
336 if (LP_DEBUG & DEBUG_LINEAR)
337 debug_printf(" -- samp[%d]: check_sampler failed\n", i);
338 goto fail;
339 }
340 }
341
342 /* Check shader. May not have been jitted.
343 */
344 if (variant->linear_function == NULL) {
345 if (LP_DEBUG & DEBUG_LINEAR)
346 debug_printf(" -- no linear shader\n");
347 goto fail;
348 }
349
350 /* Hook in the catchall shader runner:
351 */
352 variant->jit_linear = lp_fs_linear_run;
353
354 /* Figure out which inputs we don't need to interpolate (because
355 * they are only used as texture coordinates). This is important
356 * as we can cope with texture coordinates which exceed 1.0, but
357 * cannot do so for regular inputs.
358 */
359 if (1)
360 check_linear_interp_mask_a(variant);
361 else
362 check_linear_interp_mask_b(variant);
363
364
365 if (0) {
366 lp_debug_fs_variant(variant);
367 debug_printf("linear input mask: 0x%x\n", variant->linear_input_mask);
368 }
369
370 return;
371
372 fail:
373 if (LP_DEBUG & DEBUG_LINEAR) {
374 lp_debug_fs_variant(variant);
375 debug_printf(" ----> no linear path for this variant\n");
376 }
377 }
378
379
380 #else
381 void
lp_linear_check_variant(struct lp_fragment_shader_variant * variant)382 lp_linear_check_variant(struct lp_fragment_shader_variant *variant)
383 {
384 }
385 #endif
386