1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 VMware, Inc.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * Position and shader input interpolation.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 */
35
36 #include "pipe/p_shader_tokens.h"
37 #include "util/compiler.h"
38 #include "util/u_debug.h"
39 #include "util/u_memory.h"
40 #include "util/u_math.h"
41 #include "tgsi/tgsi_scan.h"
42 #include "gallivm/lp_bld_debug.h"
43 #include "gallivm/lp_bld_const.h"
44 #include "gallivm/lp_bld_arit.h"
45 #include "gallivm/lp_bld_swizzle.h"
46 #include "gallivm/lp_bld_flow.h"
47 #include "gallivm/lp_bld_logic.h"
48 #include "gallivm/lp_bld_struct.h"
49 #include "gallivm/lp_bld_gather.h"
50 #include "lp_bld_interp.h"
51
52
53 /*
54 * The shader JIT function operates on blocks of quads.
55 * Each block has 2x2 quads and each quad has 2x2 pixels.
56 *
57 * We iterate over the quads in order 0, 1, 2, 3:
58 *
59 * #################
60 * # | # | #
61 * #---0---#---1---#
62 * # | # | #
63 * #################
64 * # | # | #
65 * #---2---#---3---#
66 * # | # | #
67 * #################
68 *
69 * If we iterate over multiple quads at once, quads 01 and 23 are processed
70 * together.
71 *
72 * Within each quad, we have four pixels which are represented in SOA
73 * order:
74 *
75 * #########
76 * # 0 | 1 #
77 * #---+---#
78 * # 2 | 3 #
79 * #########
80 *
81 * So the green channel (for example) of the four pixels is stored in
82 * a single vector register: {g0, g1, g2, g3}.
83 * The order stays the same even with multiple quads:
84 * 0 1 4 5
85 * 2 3 6 7
86 * is stored as g0..g7
87 */
88
89
90 /**
91 * Do one perspective divide per quad.
92 *
93 * For perspective interpolation, the final attribute value is given
94 *
95 * a' = a/w = a * oow
96 *
97 * where
98 *
99 * a = a0 + dadx*x + dady*y
100 * w = w0 + dwdx*x + dwdy*y
101 * oow = 1/w = 1/(w0 + dwdx*x + dwdy*y)
102 *
103 * Instead of computing the division per pixel, with this macro we compute the
104 * division on the upper left pixel of each quad, and use a linear
105 * approximation in the remaining pixels, given by:
106 *
107 * da'dx = (dadx - dwdx*a)*oow
108 * da'dy = (dady - dwdy*a)*oow
109 *
110 * Ironically, this actually makes things slower -- probably because the
111 * divide hardware unit is rarely used, whereas the multiply unit is typically
112 * already saturated.
113 */
114 #define PERSPECTIVE_DIVIDE_PER_QUAD 0
115
116
117 static const unsigned char quad_offset_x[16] = {0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3};
118 static const unsigned char quad_offset_y[16] = {0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3};
119
120
121 static void
attrib_name(LLVMValueRef val,unsigned attrib,unsigned chan,const char * suffix)122 attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix)
123 {
124 if(attrib == 0)
125 lp_build_name(val, "pos.%c%s", "xyzw"[chan], suffix);
126 else
127 lp_build_name(val, "input%u.%c%s", attrib - 1, "xyzw"[chan], suffix);
128 }
129
130 static void
calc_offsets(struct lp_build_context * coeff_bld,unsigned quad_start_index,LLVMValueRef * pixoffx,LLVMValueRef * pixoffy)131 calc_offsets(struct lp_build_context *coeff_bld,
132 unsigned quad_start_index,
133 LLVMValueRef *pixoffx,
134 LLVMValueRef *pixoffy)
135 {
136 unsigned i;
137 unsigned num_pix = coeff_bld->type.length;
138 struct gallivm_state *gallivm = coeff_bld->gallivm;
139 LLVMBuilderRef builder = coeff_bld->gallivm->builder;
140 LLVMValueRef nr, pixxf, pixyf;
141
142 *pixoffx = coeff_bld->undef;
143 *pixoffy = coeff_bld->undef;
144
145 for (i = 0; i < num_pix; i++) {
146 nr = lp_build_const_int32(gallivm, i);
147 pixxf = lp_build_const_float(gallivm, quad_offset_x[i % num_pix] +
148 (quad_start_index & 1) * 2);
149 pixyf = lp_build_const_float(gallivm, quad_offset_y[i % num_pix] +
150 (quad_start_index & 2));
151 *pixoffx = LLVMBuildInsertElement(builder, *pixoffx, pixxf, nr, "");
152 *pixoffy = LLVMBuildInsertElement(builder, *pixoffy, pixyf, nr, "");
153 }
154 }
155
156 static void
calc_centroid_offsets(struct lp_build_interp_soa_context * bld,struct gallivm_state * gallivm,LLVMValueRef loop_iter,LLVMValueRef mask_store,LLVMValueRef pix_center_offset,LLVMValueRef * centroid_x,LLVMValueRef * centroid_y)157 calc_centroid_offsets(struct lp_build_interp_soa_context *bld,
158 struct gallivm_state *gallivm,
159 LLVMValueRef loop_iter,
160 LLVMValueRef mask_store,
161 LLVMValueRef pix_center_offset,
162 LLVMValueRef *centroid_x, LLVMValueRef *centroid_y)
163 {
164 struct lp_build_context *coeff_bld = &bld->coeff_bld;
165 LLVMBuilderRef builder = gallivm->builder;
166 LLVMValueRef s_mask_and = NULL;
167 LLVMValueRef centroid_x_offset = pix_center_offset;
168 LLVMValueRef centroid_y_offset = pix_center_offset;
169 for (int s = bld->coverage_samples - 1; s >= 0; s--) {
170 LLVMValueRef sample_cov;
171 LLVMValueRef s_mask_idx = LLVMBuildMul(builder, bld->num_loop, lp_build_const_int32(gallivm, s), "");
172
173 s_mask_idx = LLVMBuildAdd(builder, s_mask_idx, loop_iter, "");
174 sample_cov = lp_build_pointer_get(builder, mask_store, s_mask_idx);
175 if (s == bld->coverage_samples - 1)
176 s_mask_and = sample_cov;
177 else
178 s_mask_and = LLVMBuildAnd(builder, s_mask_and, sample_cov, "");
179
180 LLVMValueRef x_val_idx = lp_build_const_int32(gallivm, s * 2);
181 LLVMValueRef y_val_idx = lp_build_const_int32(gallivm, s * 2 + 1);
182
183 x_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, x_val_idx);
184 y_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, y_val_idx);
185 x_val_idx = lp_build_broadcast_scalar(coeff_bld, x_val_idx);
186 y_val_idx = lp_build_broadcast_scalar(coeff_bld, y_val_idx);
187 centroid_x_offset = lp_build_select(coeff_bld, sample_cov, x_val_idx, centroid_x_offset);
188 centroid_y_offset = lp_build_select(coeff_bld, sample_cov, y_val_idx, centroid_y_offset);
189 }
190 *centroid_x = lp_build_select(coeff_bld, s_mask_and, pix_center_offset, centroid_x_offset);
191 *centroid_y = lp_build_select(coeff_bld, s_mask_and, pix_center_offset, centroid_y_offset);
192 }
193
194 /* Much easier, and significantly less instructions in the per-stamp
195 * part (less than half) but overall more instructions so a loss if
196 * most quads are active. Might be a win though with larger vectors.
197 * No ability to do per-quad divide (doable but not implemented)
198 * Could be made to work with passed in pixel offsets (i.e. active quad merging).
199 */
200 static void
coeffs_init_simple(struct lp_build_interp_soa_context * bld,LLVMValueRef a0_ptr,LLVMValueRef dadx_ptr,LLVMValueRef dady_ptr)201 coeffs_init_simple(struct lp_build_interp_soa_context *bld,
202 LLVMValueRef a0_ptr,
203 LLVMValueRef dadx_ptr,
204 LLVMValueRef dady_ptr)
205 {
206 struct lp_build_context *coeff_bld = &bld->coeff_bld;
207 struct lp_build_context *setup_bld = &bld->setup_bld;
208 struct gallivm_state *gallivm = coeff_bld->gallivm;
209 LLVMBuilderRef builder = gallivm->builder;
210 unsigned attrib;
211
212 for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
213 /*
214 * always fetch all 4 values for performance/simplicity
215 * Note: we do that here because it seems to generate better
216 * code. It generates a lot of moves initially but less
217 * moves later. As far as I can tell this looks like a
218 * llvm issue, instead of simply reloading the values from
219 * the passed in pointers it if it runs out of registers
220 * it spills/reloads them. Maybe some optimization passes
221 * would help.
222 * Might want to investigate this again later.
223 */
224 const unsigned interp = bld->interp[attrib];
225 LLVMValueRef index = lp_build_const_int32(gallivm,
226 attrib * TGSI_NUM_CHANNELS);
227 LLVMValueRef ptr;
228 LLVMValueRef dadxaos = setup_bld->zero;
229 LLVMValueRef dadyaos = setup_bld->zero;
230 LLVMValueRef a0aos = setup_bld->zero;
231
232 switch (interp) {
233 case LP_INTERP_PERSPECTIVE:
234 FALLTHROUGH;
235
236 case LP_INTERP_LINEAR:
237 ptr = LLVMBuildGEP(builder, dadx_ptr, &index, 1, "");
238 ptr = LLVMBuildBitCast(builder, ptr,
239 LLVMPointerType(setup_bld->vec_type, 0), "");
240 dadxaos = LLVMBuildLoad(builder, ptr, "");
241
242 ptr = LLVMBuildGEP(builder, dady_ptr, &index, 1, "");
243 ptr = LLVMBuildBitCast(builder, ptr,
244 LLVMPointerType(setup_bld->vec_type, 0), "");
245 dadyaos = LLVMBuildLoad(builder, ptr, "");
246
247 attrib_name(dadxaos, attrib, 0, ".dadxaos");
248 attrib_name(dadyaos, attrib, 0, ".dadyaos");
249 FALLTHROUGH;
250
251 case LP_INTERP_CONSTANT:
252 case LP_INTERP_FACING:
253 ptr = LLVMBuildGEP(builder, a0_ptr, &index, 1, "");
254 ptr = LLVMBuildBitCast(builder, ptr,
255 LLVMPointerType(setup_bld->vec_type, 0), "");
256 a0aos = LLVMBuildLoad(builder, ptr, "");
257 attrib_name(a0aos, attrib, 0, ".a0aos");
258 break;
259
260 case LP_INTERP_POSITION:
261 /* Nothing to do as the position coeffs are already setup in slot 0 */
262 continue;
263
264 default:
265 assert(0);
266 break;
267 }
268 bld->a0aos[attrib] = a0aos;
269 bld->dadxaos[attrib] = dadxaos;
270 bld->dadyaos[attrib] = dadyaos;
271 }
272 }
273
274 /**
275 * Interpolate the shader input attribute values.
276 * This is called for each (group of) quad(s).
277 */
278 static void
attribs_update_simple(struct lp_build_interp_soa_context * bld,struct gallivm_state * gallivm,LLVMValueRef loop_iter,LLVMValueRef mask_store,LLVMValueRef sample_id,int start,int end)279 attribs_update_simple(struct lp_build_interp_soa_context *bld,
280 struct gallivm_state *gallivm,
281 LLVMValueRef loop_iter,
282 LLVMValueRef mask_store,
283 LLVMValueRef sample_id,
284 int start,
285 int end)
286 {
287 LLVMBuilderRef builder = gallivm->builder;
288 struct lp_build_context *coeff_bld = &bld->coeff_bld;
289 struct lp_build_context *setup_bld = &bld->setup_bld;
290 LLVMValueRef oow = NULL;
291 unsigned attrib;
292 LLVMValueRef pixoffx;
293 LLVMValueRef pixoffy;
294 LLVMValueRef ptr;
295 LLVMValueRef pix_center_offset = lp_build_const_vec(gallivm, coeff_bld->type, 0.5);
296
297 /* could do this with code-generated passed in pixel offsets too */
298
299 assert(loop_iter);
300 ptr = LLVMBuildGEP(builder, bld->xoffset_store, &loop_iter, 1, "");
301 pixoffx = LLVMBuildLoad(builder, ptr, "");
302 ptr = LLVMBuildGEP(builder, bld->yoffset_store, &loop_iter, 1, "");
303 pixoffy = LLVMBuildLoad(builder, ptr, "");
304
305 pixoffx = LLVMBuildFAdd(builder, pixoffx,
306 lp_build_broadcast_scalar(coeff_bld, bld->x), "");
307 pixoffy = LLVMBuildFAdd(builder, pixoffy,
308 lp_build_broadcast_scalar(coeff_bld, bld->y), "");
309
310 for (attrib = start; attrib < end; attrib++) {
311 const unsigned mask = bld->mask[attrib];
312 const unsigned interp = bld->interp[attrib];
313 const unsigned loc = bld->interp_loc[attrib];
314 unsigned chan;
315
316 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
317 if (mask & (1 << chan)) {
318 LLVMValueRef index;
319 LLVMValueRef dadx = coeff_bld->zero;
320 LLVMValueRef dady = coeff_bld->zero;
321 LLVMValueRef a = coeff_bld->zero;
322 LLVMValueRef chan_pixoffx = pixoffx, chan_pixoffy = pixoffy;
323
324 index = lp_build_const_int32(gallivm, chan);
325 switch (interp) {
326 case LP_INTERP_PERSPECTIVE:
327 FALLTHROUGH;
328
329 case LP_INTERP_LINEAR:
330 if (attrib == 0 && chan == 0) {
331 dadx = coeff_bld->one;
332 if (sample_id) {
333 LLVMValueRef x_val_idx = LLVMBuildMul(gallivm->builder, sample_id, lp_build_const_int32(gallivm, 2), "");
334 x_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, x_val_idx);
335 a = lp_build_broadcast_scalar(coeff_bld, x_val_idx);
336 } else {
337 a = lp_build_const_vec(gallivm, coeff_bld->type, bld->pos_offset);
338 }
339 }
340 else if (attrib == 0 && chan == 1) {
341 dady = coeff_bld->one;
342 if (sample_id) {
343 LLVMValueRef y_val_idx = LLVMBuildMul(gallivm->builder, sample_id, lp_build_const_int32(gallivm, 2), "");
344 y_val_idx = LLVMBuildAdd(gallivm->builder, y_val_idx, lp_build_const_int32(gallivm, 1), "");
345 y_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, y_val_idx);
346 a = lp_build_broadcast_scalar(coeff_bld, y_val_idx);
347 } else {
348 a = lp_build_const_vec(gallivm, coeff_bld->type, bld->pos_offset);
349 }
350 }
351 else {
352 dadx = lp_build_extract_broadcast(gallivm, setup_bld->type,
353 coeff_bld->type, bld->dadxaos[attrib],
354 index);
355 dady = lp_build_extract_broadcast(gallivm, setup_bld->type,
356 coeff_bld->type, bld->dadyaos[attrib],
357 index);
358 a = lp_build_extract_broadcast(gallivm, setup_bld->type,
359 coeff_bld->type, bld->a0aos[attrib],
360 index);
361
362 if (bld->coverage_samples > 1) {
363 LLVMValueRef xoffset = pix_center_offset;
364 LLVMValueRef yoffset = pix_center_offset;
365 if (loc == TGSI_INTERPOLATE_LOC_SAMPLE || (attrib == 0 && chan == 2 && sample_id)) {
366 LLVMValueRef x_val_idx = LLVMBuildMul(gallivm->builder, sample_id, lp_build_const_int32(gallivm, 2), "");
367 LLVMValueRef y_val_idx = LLVMBuildAdd(gallivm->builder, x_val_idx, lp_build_const_int32(gallivm, 1), "");
368
369 x_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, x_val_idx);
370 y_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, y_val_idx);
371 xoffset = lp_build_broadcast_scalar(coeff_bld, x_val_idx);
372 yoffset = lp_build_broadcast_scalar(coeff_bld, y_val_idx);
373 } else if (loc == TGSI_INTERPOLATE_LOC_CENTROID) {
374 calc_centroid_offsets(bld, gallivm, loop_iter, mask_store,
375 pix_center_offset, &xoffset, &yoffset);
376 }
377 chan_pixoffx = lp_build_add(coeff_bld, chan_pixoffx, xoffset);
378 chan_pixoffy = lp_build_add(coeff_bld, chan_pixoffy, yoffset);
379 }
380 }
381
382 /*
383 * a = a0 + (x * dadx + y * dady)
384 */
385 a = lp_build_fmuladd(builder, dadx, chan_pixoffx, a);
386 a = lp_build_fmuladd(builder, dady, chan_pixoffy, a);
387
388 if (interp == LP_INTERP_PERSPECTIVE) {
389 if (oow == NULL) {
390 LLVMValueRef w = bld->attribs[0][3];
391 assert(attrib != 0);
392 assert(bld->mask[0] & TGSI_WRITEMASK_W);
393 oow = lp_build_rcp(coeff_bld, w);
394 }
395 a = lp_build_mul(coeff_bld, a, oow);
396 }
397 break;
398
399 case LP_INTERP_CONSTANT:
400 case LP_INTERP_FACING:
401 a = lp_build_extract_broadcast(gallivm, setup_bld->type,
402 coeff_bld->type, bld->a0aos[attrib],
403 index);
404 break;
405
406 case LP_INTERP_POSITION:
407 assert(attrib > 0);
408 a = bld->attribs[0][chan];
409 break;
410
411 default:
412 assert(0);
413 break;
414 }
415
416 if ((attrib == 0) && (chan == 2)) {
417 /* add polygon-offset value, stored in the X component of a0 */
418 LLVMValueRef offset =
419 lp_build_extract_broadcast(gallivm, setup_bld->type,
420 coeff_bld->type, bld->a0aos[0],
421 lp_build_const_int32(gallivm, 0));
422 a = LLVMBuildFAdd(builder, a, offset, "");
423
424 if (!bld->depth_clamp){
425 /* OpenGL requires clamping z to 0..1 range after polgon offset
426 * is applied if depth-clamping isn't enabled.
427 *
428 * This also fixes the problem that depth values can exceed 1.0,
429 * due to imprecision in the calculations.
430 */
431 a = lp_build_clamp(coeff_bld, a, coeff_bld->zero, coeff_bld->one);
432 }
433 }
434
435 bld->attribs[attrib][chan] = a;
436 }
437 }
438 }
439 }
440
441 static LLVMValueRef
lp_build_interp_soa_indirect(struct lp_build_interp_soa_context * bld,struct gallivm_state * gallivm,unsigned attrib,unsigned chan,LLVMValueRef indir_index,LLVMValueRef pixoffx,LLVMValueRef pixoffy)442 lp_build_interp_soa_indirect(struct lp_build_interp_soa_context *bld,
443 struct gallivm_state *gallivm,
444 unsigned attrib, unsigned chan,
445 LLVMValueRef indir_index,
446 LLVMValueRef pixoffx,
447 LLVMValueRef pixoffy)
448 {
449 LLVMBuilderRef builder = gallivm->builder;
450 struct lp_build_context *coeff_bld = &bld->coeff_bld;
451 const unsigned interp = bld->interp[attrib];
452 LLVMValueRef dadx = coeff_bld->zero;
453 LLVMValueRef dady = coeff_bld->zero;
454 LLVMValueRef a = coeff_bld->zero;
455
456 LLVMTypeRef u8ptr = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
457
458 indir_index = LLVMBuildAdd(builder, indir_index, lp_build_const_int_vec(gallivm, coeff_bld->type, attrib), "");
459 LLVMValueRef index = LLVMBuildMul(builder, indir_index, lp_build_const_int_vec(gallivm, coeff_bld->type, 4), "");
460 index = LLVMBuildAdd(builder, index, lp_build_const_int_vec(gallivm, coeff_bld->type, chan), "");
461
462 /* size up to byte indices */
463 index = LLVMBuildMul(builder, index, lp_build_const_int_vec(gallivm, coeff_bld->type, 4), "");
464
465 struct lp_type dst_type = coeff_bld->type;
466 dst_type.length = 1;
467 switch (interp) {
468 case LP_INTERP_PERSPECTIVE:
469 FALLTHROUGH;
470 case LP_INTERP_LINEAR:
471
472 dadx = lp_build_gather(gallivm, coeff_bld->type.length,
473 coeff_bld->type.width, dst_type,
474 true, LLVMBuildBitCast(builder, bld->dadx_ptr, u8ptr, ""), index, false);
475
476 dady = lp_build_gather(gallivm, coeff_bld->type.length,
477 coeff_bld->type.width, dst_type,
478 true, LLVMBuildBitCast(builder, bld->dady_ptr, u8ptr, ""), index, false);
479
480 a = lp_build_gather(gallivm, coeff_bld->type.length,
481 coeff_bld->type.width, dst_type,
482 true, LLVMBuildBitCast(builder, bld->a0_ptr, u8ptr, ""), index, false);
483
484 /*
485 * a = a0 + (x * dadx + y * dady)
486 */
487 a = lp_build_fmuladd(builder, dadx, pixoffx, a);
488 a = lp_build_fmuladd(builder, dady, pixoffy, a);
489
490 if (interp == LP_INTERP_PERSPECTIVE) {
491 LLVMValueRef w = bld->attribs[0][3];
492 assert(attrib != 0);
493 assert(bld->mask[0] & TGSI_WRITEMASK_W);
494 LLVMValueRef oow = lp_build_rcp(coeff_bld, w);
495 a = lp_build_mul(coeff_bld, a, oow);
496 }
497
498 break;
499 case LP_INTERP_CONSTANT:
500 case LP_INTERP_FACING:
501 a = lp_build_gather(gallivm, coeff_bld->type.length,
502 coeff_bld->type.width, dst_type,
503 true, LLVMBuildBitCast(builder, bld->a0_ptr, u8ptr, ""), index, false);
504 break;
505 default:
506 assert(0);
507 break;
508 }
509 return a;
510 }
511
512 LLVMValueRef
lp_build_interp_soa(struct lp_build_interp_soa_context * bld,struct gallivm_state * gallivm,LLVMValueRef loop_iter,LLVMValueRef mask_store,unsigned attrib,unsigned chan,unsigned loc,LLVMValueRef indir_index,LLVMValueRef offsets[2])513 lp_build_interp_soa(struct lp_build_interp_soa_context *bld,
514 struct gallivm_state *gallivm,
515 LLVMValueRef loop_iter,
516 LLVMValueRef mask_store,
517 unsigned attrib, unsigned chan,
518 unsigned loc,
519 LLVMValueRef indir_index,
520 LLVMValueRef offsets[2])
521 {
522 LLVMBuilderRef builder = gallivm->builder;
523 struct lp_build_context *coeff_bld = &bld->coeff_bld;
524 struct lp_build_context *setup_bld = &bld->setup_bld;
525 LLVMValueRef pixoffx;
526 LLVMValueRef pixoffy;
527 LLVMValueRef ptr;
528
529 /* could do this with code-generated passed in pixel offsets too */
530
531 assert(loop_iter);
532 ptr = LLVMBuildGEP(builder, bld->xoffset_store, &loop_iter, 1, "");
533 pixoffx = LLVMBuildLoad(builder, ptr, "");
534 ptr = LLVMBuildGEP(builder, bld->yoffset_store, &loop_iter, 1, "");
535 pixoffy = LLVMBuildLoad(builder, ptr, "");
536
537 pixoffx = LLVMBuildFAdd(builder, pixoffx,
538 lp_build_broadcast_scalar(coeff_bld, bld->x), "");
539 pixoffy = LLVMBuildFAdd(builder, pixoffy,
540 lp_build_broadcast_scalar(coeff_bld, bld->y), "");
541
542 LLVMValueRef pix_center_offset = lp_build_const_vec(gallivm, coeff_bld->type, 0.5);
543
544 if (loc == TGSI_INTERPOLATE_LOC_CENTER) {
545 if (bld->coverage_samples > 1) {
546 pixoffx = LLVMBuildFAdd(builder, pixoffx, pix_center_offset, "");
547 pixoffy = LLVMBuildFAdd(builder, pixoffy, pix_center_offset, "");
548 }
549
550 if (offsets[0])
551 pixoffx = LLVMBuildFAdd(builder, pixoffx,
552 offsets[0], "");
553 if (offsets[1])
554 pixoffy = LLVMBuildFAdd(builder, pixoffy,
555 offsets[1], "");
556 } else if (loc == TGSI_INTERPOLATE_LOC_SAMPLE) {
557 LLVMValueRef x_val_idx = LLVMBuildMul(gallivm->builder, offsets[0], lp_build_const_int_vec(gallivm, bld->coeff_bld.type, 2 * 4), "");
558 LLVMValueRef y_val_idx = LLVMBuildAdd(gallivm->builder, x_val_idx, lp_build_const_int_vec(gallivm, bld->coeff_bld.type, 4), "");
559
560 LLVMValueRef base_ptr = LLVMBuildBitCast(gallivm->builder, bld->sample_pos_array,
561 LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), "");
562 LLVMValueRef xoffset = lp_build_gather(gallivm,
563 bld->coeff_bld.type.length,
564 bld->coeff_bld.type.width,
565 lp_elem_type(bld->coeff_bld.type),
566 false,
567 base_ptr,
568 x_val_idx, true);
569 LLVMValueRef yoffset = lp_build_gather(gallivm,
570 bld->coeff_bld.type.length,
571 bld->coeff_bld.type.width,
572 lp_elem_type(bld->coeff_bld.type),
573 false,
574 base_ptr,
575 y_val_idx, true);
576
577 if (bld->coverage_samples > 1) {
578 pixoffx = LLVMBuildFAdd(builder, pixoffx, xoffset, "");
579 pixoffy = LLVMBuildFAdd(builder, pixoffy, yoffset, "");
580 }
581 } else if (loc == TGSI_INTERPOLATE_LOC_CENTROID) {
582 LLVMValueRef centroid_x_offset, centroid_y_offset;
583
584 /* for centroid find covered samples for this quad. */
585 /* if all samples are covered use pixel centers */
586 if (bld->coverage_samples > 1) {
587 calc_centroid_offsets(bld, gallivm, loop_iter, mask_store,
588 pix_center_offset, ¢roid_x_offset, ¢roid_y_offset);
589
590 pixoffx = LLVMBuildFAdd(builder, pixoffx, centroid_x_offset, "");
591 pixoffy = LLVMBuildFAdd(builder, pixoffy, centroid_y_offset, "");
592 }
593 }
594
595 // remap attrib properly.
596 attrib++;
597
598 if (indir_index)
599 return lp_build_interp_soa_indirect(bld, gallivm, attrib, chan,
600 indir_index, pixoffx, pixoffy);
601
602
603 const unsigned interp = bld->interp[attrib];
604 LLVMValueRef dadx = coeff_bld->zero;
605 LLVMValueRef dady = coeff_bld->zero;
606 LLVMValueRef a = coeff_bld->zero;
607
608 LLVMValueRef index = lp_build_const_int32(gallivm, chan);
609
610 switch (interp) {
611 case LP_INTERP_PERSPECTIVE:
612 FALLTHROUGH;
613 case LP_INTERP_LINEAR:
614 dadx = lp_build_extract_broadcast(gallivm, setup_bld->type,
615 coeff_bld->type, bld->dadxaos[attrib],
616 index);
617
618 dady = lp_build_extract_broadcast(gallivm, setup_bld->type,
619 coeff_bld->type, bld->dadyaos[attrib],
620 index);
621
622 a = lp_build_extract_broadcast(gallivm, setup_bld->type,
623 coeff_bld->type, bld->a0aos[attrib],
624 index);
625
626 /*
627 * a = a0 + (x * dadx + y * dady)
628 */
629 a = lp_build_fmuladd(builder, dadx, pixoffx, a);
630 a = lp_build_fmuladd(builder, dady, pixoffy, a);
631
632 if (interp == LP_INTERP_PERSPECTIVE) {
633 LLVMValueRef w = bld->attribs[0][3];
634 assert(attrib != 0);
635 assert(bld->mask[0] & TGSI_WRITEMASK_W);
636 LLVMValueRef oow = lp_build_rcp(coeff_bld, w);
637 a = lp_build_mul(coeff_bld, a, oow);
638 }
639
640 break;
641 case LP_INTERP_CONSTANT:
642 case LP_INTERP_FACING:
643 a = lp_build_extract_broadcast(gallivm, setup_bld->type,
644 coeff_bld->type, bld->a0aos[attrib],
645 index);
646 break;
647 default:
648 assert(0);
649 break;
650 }
651 return a;
652 }
653
654 /**
655 * Generate the position vectors.
656 *
657 * Parameter x0, y0 are the integer values with upper left coordinates.
658 */
659 static void
pos_init(struct lp_build_interp_soa_context * bld,LLVMValueRef x0,LLVMValueRef y0)660 pos_init(struct lp_build_interp_soa_context *bld,
661 LLVMValueRef x0,
662 LLVMValueRef y0)
663 {
664 LLVMBuilderRef builder = bld->coeff_bld.gallivm->builder;
665 struct lp_build_context *coeff_bld = &bld->coeff_bld;
666
667 bld->x = LLVMBuildSIToFP(builder, x0, coeff_bld->elem_type, "");
668 bld->y = LLVMBuildSIToFP(builder, y0, coeff_bld->elem_type, "");
669 }
670
671
672 /**
673 * Initialize fragment shader input attribute info.
674 */
675 void
lp_build_interp_soa_init(struct lp_build_interp_soa_context * bld,struct gallivm_state * gallivm,unsigned num_inputs,const struct lp_shader_input * inputs,boolean pixel_center_integer,unsigned coverage_samples,LLVMValueRef sample_pos_array,LLVMValueRef num_loop,boolean depth_clamp,LLVMBuilderRef builder,struct lp_type type,LLVMValueRef a0_ptr,LLVMValueRef dadx_ptr,LLVMValueRef dady_ptr,LLVMValueRef x0,LLVMValueRef y0)676 lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
677 struct gallivm_state *gallivm,
678 unsigned num_inputs,
679 const struct lp_shader_input *inputs,
680 boolean pixel_center_integer,
681 unsigned coverage_samples,
682 LLVMValueRef sample_pos_array,
683 LLVMValueRef num_loop,
684 boolean depth_clamp,
685 LLVMBuilderRef builder,
686 struct lp_type type,
687 LLVMValueRef a0_ptr,
688 LLVMValueRef dadx_ptr,
689 LLVMValueRef dady_ptr,
690 LLVMValueRef x0,
691 LLVMValueRef y0)
692 {
693 struct lp_type coeff_type;
694 struct lp_type setup_type;
695 unsigned attrib;
696 unsigned chan;
697
698 memset(bld, 0, sizeof *bld);
699
700 memset(&coeff_type, 0, sizeof coeff_type);
701 coeff_type.floating = TRUE;
702 coeff_type.sign = TRUE;
703 coeff_type.width = 32;
704 coeff_type.length = type.length;
705
706 memset(&setup_type, 0, sizeof setup_type);
707 setup_type.floating = TRUE;
708 setup_type.sign = TRUE;
709 setup_type.width = 32;
710 setup_type.length = TGSI_NUM_CHANNELS;
711
712
713 /* XXX: we don't support interpolating into any other types */
714 assert(memcmp(&coeff_type, &type, sizeof coeff_type) == 0);
715
716 lp_build_context_init(&bld->coeff_bld, gallivm, coeff_type);
717 lp_build_context_init(&bld->setup_bld, gallivm, setup_type);
718
719 /* For convenience */
720 bld->pos = bld->attribs[0];
721 bld->inputs = (const LLVMValueRef (*)[TGSI_NUM_CHANNELS]) bld->attribs[1];
722
723 /* Position */
724 bld->mask[0] = TGSI_WRITEMASK_XYZW;
725 bld->interp[0] = LP_INTERP_LINEAR;
726 bld->interp_loc[0] = 0;
727
728 /* Inputs */
729 for (attrib = 0; attrib < num_inputs; ++attrib) {
730 bld->mask[1 + attrib] = inputs[attrib].usage_mask;
731 bld->interp[1 + attrib] = inputs[attrib].interp;
732 bld->interp_loc[1 + attrib] = inputs[attrib].location;
733 }
734 bld->num_attribs = 1 + num_inputs;
735
736 /* needed for indirect */
737 bld->a0_ptr = a0_ptr;
738 bld->dadx_ptr = dadx_ptr;
739 bld->dady_ptr = dady_ptr;
740
741 /* Ensure all masked out input channels have a valid value */
742 for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
743 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
744 bld->attribs[attrib][chan] = bld->coeff_bld.undef;
745 }
746 }
747
748 if (pixel_center_integer) {
749 bld->pos_offset = 0.0;
750 } else {
751 bld->pos_offset = 0.5;
752 }
753 bld->depth_clamp = depth_clamp;
754 bld->coverage_samples = coverage_samples;
755 bld->num_loop = num_loop;
756 bld->sample_pos_array = sample_pos_array;
757
758 pos_init(bld, x0, y0);
759
760 /*
761 * Simple method (single step interpolation) may be slower if vector length
762 * is just 4, but the results are different (generally less accurate) with
763 * the other method, so always use more accurate version.
764 */
765 {
766 /* XXX this should use a global static table */
767 unsigned i;
768 unsigned num_loops = 16 / type.length;
769 LLVMValueRef pixoffx, pixoffy, index;
770 LLVMValueRef ptr;
771
772 bld->xoffset_store = lp_build_array_alloca(gallivm,
773 lp_build_vec_type(gallivm, type),
774 lp_build_const_int32(gallivm, num_loops),
775 "");
776 bld->yoffset_store = lp_build_array_alloca(gallivm,
777 lp_build_vec_type(gallivm, type),
778 lp_build_const_int32(gallivm, num_loops),
779 "");
780 for (i = 0; i < num_loops; i++) {
781 index = lp_build_const_int32(gallivm, i);
782 calc_offsets(&bld->coeff_bld, i*type.length/4, &pixoffx, &pixoffy);
783 ptr = LLVMBuildGEP(builder, bld->xoffset_store, &index, 1, "");
784 LLVMBuildStore(builder, pixoffx, ptr);
785 ptr = LLVMBuildGEP(builder, bld->yoffset_store, &index, 1, "");
786 LLVMBuildStore(builder, pixoffy, ptr);
787 }
788 }
789 coeffs_init_simple(bld, a0_ptr, dadx_ptr, dady_ptr);
790 }
791
792
793 /*
794 * Advance the position and inputs to the given quad within the block.
795 */
796
797 void
lp_build_interp_soa_update_inputs_dyn(struct lp_build_interp_soa_context * bld,struct gallivm_state * gallivm,LLVMValueRef quad_start_index,LLVMValueRef mask_store,LLVMValueRef sample_id)798 lp_build_interp_soa_update_inputs_dyn(struct lp_build_interp_soa_context *bld,
799 struct gallivm_state *gallivm,
800 LLVMValueRef quad_start_index,
801 LLVMValueRef mask_store,
802 LLVMValueRef sample_id)
803 {
804 attribs_update_simple(bld, gallivm, quad_start_index, mask_store, sample_id, 1, bld->num_attribs);
805 }
806
807 void
lp_build_interp_soa_update_pos_dyn(struct lp_build_interp_soa_context * bld,struct gallivm_state * gallivm,LLVMValueRef quad_start_index,LLVMValueRef sample_id)808 lp_build_interp_soa_update_pos_dyn(struct lp_build_interp_soa_context *bld,
809 struct gallivm_state *gallivm,
810 LLVMValueRef quad_start_index,
811 LLVMValueRef sample_id)
812 {
813 attribs_update_simple(bld, gallivm, quad_start_index, NULL, sample_id, 0, 1);
814 }
815
816