1 /**************************************************************************
2  *
3  * Copyright 2009 VMware, Inc.
4  * Copyright 2007-2008 VMware, Inc.
5  * All Rights Reserved.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the
9  * "Software"), to deal in the Software without restriction, including
10  * without limitation the rights to use, copy, modify, merge, publish,
11  * distribute, sub license, and/or sell copies of the Software, and to
12  * permit persons to whom the Software is furnished to do so, subject to
13  * the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the
16  * next paragraph) shall be included in all copies or substantial portions
17  * of the Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26  *
27  **************************************************************************/
28 
29 /**
30  * @file
31  * Position and shader input interpolation.
32  *
33  * @author Jose Fonseca <jfonseca@vmware.com>
34  */
35 
36 #include "pipe/p_shader_tokens.h"
37 #include "util/u_debug.h"
38 #include "util/u_memory.h"
39 #include "util/u_math.h"
40 #include "tgsi/tgsi_scan.h"
41 #include "gallivm/lp_bld_debug.h"
42 #include "gallivm/lp_bld_const.h"
43 #include "gallivm/lp_bld_arit.h"
44 #include "gallivm/lp_bld_swizzle.h"
45 #include "gallivm/lp_bld_flow.h"
46 #include "gallivm/lp_bld_logic.h"
47 #include "gallivm/lp_bld_struct.h"
48 #include "gallivm/lp_bld_gather.h"
49 #include "lp_bld_interp.h"
50 
51 
52 /*
53  * The shader JIT function operates on blocks of quads.
54  * Each block has 2x2 quads and each quad has 2x2 pixels.
55  *
56  * We iterate over the quads in order 0, 1, 2, 3:
57  *
58  * #################
59  * #   |   #   |   #
60  * #---0---#---1---#
61  * #   |   #   |   #
62  * #################
63  * #   |   #   |   #
64  * #---2---#---3---#
65  * #   |   #   |   #
66  * #################
67  *
68  * If we iterate over multiple quads at once, quads 01 and 23 are processed
69  * together.
70  *
71  * Within each quad, we have four pixels which are represented in SOA
72  * order:
73  *
74  * #########
75  * # 0 | 1 #
76  * #---+---#
77  * # 2 | 3 #
78  * #########
79  *
80  * So the green channel (for example) of the four pixels is stored in
81  * a single vector register: {g0, g1, g2, g3}.
82  * The order stays the same even with multiple quads:
83  * 0 1 4 5
84  * 2 3 6 7
85  * is stored as g0..g7
86  */
87 
88 
89 /**
90  * Do one perspective divide per quad.
91  *
92  * For perspective interpolation, the final attribute value is given
93  *
94  *  a' = a/w = a * oow
95  *
96  * where
97  *
98  *  a = a0 + dadx*x + dady*y
99  *  w = w0 + dwdx*x + dwdy*y
100  *  oow = 1/w = 1/(w0 + dwdx*x + dwdy*y)
101  *
102  * Instead of computing the division per pixel, with this macro we compute the
103  * division on the upper left pixel of each quad, and use a linear
104  * approximation in the remaining pixels, given by:
105  *
106  *  da'dx = (dadx - dwdx*a)*oow
107  *  da'dy = (dady - dwdy*a)*oow
108  *
109  * Ironically, this actually makes things slower -- probably because the
110  * divide hardware unit is rarely used, whereas the multiply unit is typically
111  * already saturated.
112  */
113 #define PERSPECTIVE_DIVIDE_PER_QUAD 0
114 
115 
116 static const unsigned char quad_offset_x[16] = {0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3};
117 static const unsigned char quad_offset_y[16] = {0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3};
118 
119 
120 static void
attrib_name(LLVMValueRef val,unsigned attrib,unsigned chan,const char * suffix)121 attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix)
122 {
123    if(attrib == 0)
124       lp_build_name(val, "pos.%c%s", "xyzw"[chan], suffix);
125    else
126       lp_build_name(val, "input%u.%c%s", attrib - 1, "xyzw"[chan], suffix);
127 }
128 
129 static void
calc_offsets(struct lp_build_context * coeff_bld,unsigned quad_start_index,LLVMValueRef * pixoffx,LLVMValueRef * pixoffy)130 calc_offsets(struct lp_build_context *coeff_bld,
131              unsigned quad_start_index,
132              LLVMValueRef *pixoffx,
133              LLVMValueRef *pixoffy)
134 {
135    unsigned i;
136    unsigned num_pix = coeff_bld->type.length;
137    struct gallivm_state *gallivm = coeff_bld->gallivm;
138    LLVMBuilderRef builder = coeff_bld->gallivm->builder;
139    LLVMValueRef nr, pixxf, pixyf;
140 
141    *pixoffx = coeff_bld->undef;
142    *pixoffy = coeff_bld->undef;
143 
144    for (i = 0; i < num_pix; i++) {
145       nr = lp_build_const_int32(gallivm, i);
146       pixxf = lp_build_const_float(gallivm, quad_offset_x[i % num_pix] +
147                                    (quad_start_index & 1) * 2);
148       pixyf = lp_build_const_float(gallivm, quad_offset_y[i % num_pix] +
149                                    (quad_start_index & 2));
150       *pixoffx = LLVMBuildInsertElement(builder, *pixoffx, pixxf, nr, "");
151       *pixoffy = LLVMBuildInsertElement(builder, *pixoffy, pixyf, nr, "");
152    }
153 }
154 
155 static void
calc_centroid_offsets(struct lp_build_interp_soa_context * bld,struct gallivm_state * gallivm,LLVMValueRef loop_iter,LLVMValueRef mask_store,LLVMValueRef pix_center_offset,LLVMValueRef * centroid_x,LLVMValueRef * centroid_y)156 calc_centroid_offsets(struct lp_build_interp_soa_context *bld,
157                       struct gallivm_state *gallivm,
158                       LLVMValueRef loop_iter,
159                       LLVMValueRef mask_store,
160                       LLVMValueRef pix_center_offset,
161                       LLVMValueRef *centroid_x, LLVMValueRef *centroid_y)
162 {
163    struct lp_build_context *coeff_bld = &bld->coeff_bld;
164    LLVMBuilderRef builder = gallivm->builder;
165    LLVMValueRef s_mask_and = NULL;
166    LLVMValueRef centroid_x_offset = pix_center_offset;
167    LLVMValueRef centroid_y_offset = pix_center_offset;
168    for (int s = bld->coverage_samples - 1; s >= 0; s--) {
169       LLVMValueRef sample_cov;
170       LLVMValueRef s_mask_idx = LLVMBuildMul(builder, bld->num_loop, lp_build_const_int32(gallivm, s), "");
171 
172       s_mask_idx = LLVMBuildAdd(builder, s_mask_idx, loop_iter, "");
173       sample_cov = lp_build_pointer_get(builder, mask_store, s_mask_idx);
174       if (s == bld->coverage_samples - 1)
175          s_mask_and = sample_cov;
176       else
177          s_mask_and = LLVMBuildAnd(builder, s_mask_and, sample_cov, "");
178 
179       LLVMValueRef x_val_idx = lp_build_const_int32(gallivm, s * 2);
180       LLVMValueRef y_val_idx = lp_build_const_int32(gallivm, s * 2 + 1);
181 
182       x_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, x_val_idx);
183       y_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, y_val_idx);
184       x_val_idx = lp_build_broadcast_scalar(coeff_bld, x_val_idx);
185       y_val_idx = lp_build_broadcast_scalar(coeff_bld, y_val_idx);
186       centroid_x_offset = lp_build_select(coeff_bld, sample_cov, x_val_idx, centroid_x_offset);
187       centroid_y_offset = lp_build_select(coeff_bld, sample_cov, y_val_idx, centroid_y_offset);
188    }
189    *centroid_x = lp_build_select(coeff_bld, s_mask_and, pix_center_offset, centroid_x_offset);
190    *centroid_y = lp_build_select(coeff_bld, s_mask_and, pix_center_offset, centroid_y_offset);
191 }
192 
193 /* Much easier, and significantly less instructions in the per-stamp
194  * part (less than half) but overall more instructions so a loss if
195  * most quads are active. Might be a win though with larger vectors.
196  * No ability to do per-quad divide (doable but not implemented)
197  * Could be made to work with passed in pixel offsets (i.e. active quad merging).
198  */
199 static void
coeffs_init_simple(struct lp_build_interp_soa_context * bld,LLVMValueRef a0_ptr,LLVMValueRef dadx_ptr,LLVMValueRef dady_ptr)200 coeffs_init_simple(struct lp_build_interp_soa_context *bld,
201                    LLVMValueRef a0_ptr,
202                    LLVMValueRef dadx_ptr,
203                    LLVMValueRef dady_ptr)
204 {
205    struct lp_build_context *coeff_bld = &bld->coeff_bld;
206    struct lp_build_context *setup_bld = &bld->setup_bld;
207    struct gallivm_state *gallivm = coeff_bld->gallivm;
208    LLVMBuilderRef builder = gallivm->builder;
209    unsigned attrib;
210 
211    for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
212       /*
213        * always fetch all 4 values for performance/simplicity
214        * Note: we do that here because it seems to generate better
215        * code. It generates a lot of moves initially but less
216        * moves later. As far as I can tell this looks like a
217        * llvm issue, instead of simply reloading the values from
218        * the passed in pointers it if it runs out of registers
219        * it spills/reloads them. Maybe some optimization passes
220        * would help.
221        * Might want to investigate this again later.
222        */
223       const unsigned interp = bld->interp[attrib];
224       LLVMValueRef index = lp_build_const_int32(gallivm,
225                                 attrib * TGSI_NUM_CHANNELS);
226       LLVMValueRef ptr;
227       LLVMValueRef dadxaos = setup_bld->zero;
228       LLVMValueRef dadyaos = setup_bld->zero;
229       LLVMValueRef a0aos = setup_bld->zero;
230 
231       switch (interp) {
232       case LP_INTERP_PERSPECTIVE:
233          /* fall-through */
234 
235       case LP_INTERP_LINEAR:
236          ptr = LLVMBuildGEP(builder, dadx_ptr, &index, 1, "");
237          ptr = LLVMBuildBitCast(builder, ptr,
238                LLVMPointerType(setup_bld->vec_type, 0), "");
239          dadxaos = LLVMBuildLoad(builder, ptr, "");
240 
241          ptr = LLVMBuildGEP(builder, dady_ptr, &index, 1, "");
242          ptr = LLVMBuildBitCast(builder, ptr,
243                LLVMPointerType(setup_bld->vec_type, 0), "");
244          dadyaos = LLVMBuildLoad(builder, ptr, "");
245 
246          attrib_name(dadxaos, attrib, 0, ".dadxaos");
247          attrib_name(dadyaos, attrib, 0, ".dadyaos");
248          /* fall-through */
249 
250       case LP_INTERP_CONSTANT:
251       case LP_INTERP_FACING:
252          ptr = LLVMBuildGEP(builder, a0_ptr, &index, 1, "");
253          ptr = LLVMBuildBitCast(builder, ptr,
254                LLVMPointerType(setup_bld->vec_type, 0), "");
255          a0aos = LLVMBuildLoad(builder, ptr, "");
256          attrib_name(a0aos, attrib, 0, ".a0aos");
257          break;
258 
259       case LP_INTERP_POSITION:
260          /* Nothing to do as the position coeffs are already setup in slot 0 */
261          continue;
262 
263       default:
264          assert(0);
265          break;
266       }
267       bld->a0aos[attrib] = a0aos;
268       bld->dadxaos[attrib] = dadxaos;
269       bld->dadyaos[attrib] = dadyaos;
270    }
271 }
272 
273 /**
274  * Interpolate the shader input attribute values.
275  * This is called for each (group of) quad(s).
276  */
277 static void
attribs_update_simple(struct lp_build_interp_soa_context * bld,struct gallivm_state * gallivm,LLVMValueRef loop_iter,LLVMValueRef mask_store,LLVMValueRef sample_id,int start,int end)278 attribs_update_simple(struct lp_build_interp_soa_context *bld,
279                       struct gallivm_state *gallivm,
280                       LLVMValueRef loop_iter,
281                       LLVMValueRef mask_store,
282                       LLVMValueRef sample_id,
283                       int start,
284                       int end)
285 {
286    LLVMBuilderRef builder = gallivm->builder;
287    struct lp_build_context *coeff_bld = &bld->coeff_bld;
288    struct lp_build_context *setup_bld = &bld->setup_bld;
289    LLVMValueRef oow = NULL;
290    unsigned attrib;
291    LLVMValueRef pixoffx;
292    LLVMValueRef pixoffy;
293    LLVMValueRef ptr;
294    LLVMValueRef pix_center_offset = lp_build_const_vec(gallivm, coeff_bld->type, 0.5);
295 
296    /* could do this with code-generated passed in pixel offsets too */
297 
298    assert(loop_iter);
299    ptr = LLVMBuildGEP(builder, bld->xoffset_store, &loop_iter, 1, "");
300    pixoffx = LLVMBuildLoad(builder, ptr, "");
301    ptr = LLVMBuildGEP(builder, bld->yoffset_store, &loop_iter, 1, "");
302    pixoffy = LLVMBuildLoad(builder, ptr, "");
303 
304    pixoffx = LLVMBuildFAdd(builder, pixoffx,
305                            lp_build_broadcast_scalar(coeff_bld, bld->x), "");
306    pixoffy = LLVMBuildFAdd(builder, pixoffy,
307                            lp_build_broadcast_scalar(coeff_bld, bld->y), "");
308 
309    for (attrib = start; attrib < end; attrib++) {
310       const unsigned mask = bld->mask[attrib];
311       const unsigned interp = bld->interp[attrib];
312       const unsigned loc = bld->interp_loc[attrib];
313       unsigned chan;
314 
315       for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
316          if (mask & (1 << chan)) {
317             LLVMValueRef index;
318             LLVMValueRef dadx = coeff_bld->zero;
319             LLVMValueRef dady = coeff_bld->zero;
320             LLVMValueRef a = coeff_bld->zero;
321             LLVMValueRef chan_pixoffx = pixoffx, chan_pixoffy = pixoffy;
322 
323             index = lp_build_const_int32(gallivm, chan);
324             switch (interp) {
325             case LP_INTERP_PERSPECTIVE:
326                /* fall-through */
327 
328             case LP_INTERP_LINEAR:
329                if (attrib == 0 && chan == 0) {
330                   dadx = coeff_bld->one;
331                   if (sample_id) {
332                      LLVMValueRef x_val_idx = LLVMBuildMul(gallivm->builder, sample_id, lp_build_const_int32(gallivm, 2), "");
333                      x_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, x_val_idx);
334                      a = lp_build_broadcast_scalar(coeff_bld, x_val_idx);
335                   } else {
336                      a = lp_build_const_vec(gallivm, coeff_bld->type, bld->pos_offset);
337                   }
338                }
339                else if (attrib == 0 && chan == 1) {
340                   dady = coeff_bld->one;
341                   if (sample_id) {
342                      LLVMValueRef y_val_idx = LLVMBuildMul(gallivm->builder, sample_id, lp_build_const_int32(gallivm, 2), "");
343                      y_val_idx = LLVMBuildAdd(gallivm->builder, y_val_idx, lp_build_const_int32(gallivm, 1), "");
344                      y_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, y_val_idx);
345                      a = lp_build_broadcast_scalar(coeff_bld, y_val_idx);
346                   } else {
347                      a = lp_build_const_vec(gallivm, coeff_bld->type, bld->pos_offset);
348                   }
349                }
350                else {
351                   dadx = lp_build_extract_broadcast(gallivm, setup_bld->type,
352                                                     coeff_bld->type, bld->dadxaos[attrib],
353                                                     index);
354                   dady = lp_build_extract_broadcast(gallivm, setup_bld->type,
355                                                     coeff_bld->type, bld->dadyaos[attrib],
356                                                     index);
357                   a = lp_build_extract_broadcast(gallivm, setup_bld->type,
358                                                  coeff_bld->type, bld->a0aos[attrib],
359                                                  index);
360 
361                   if (bld->coverage_samples > 1) {
362                      LLVMValueRef xoffset = pix_center_offset;
363                      LLVMValueRef yoffset = pix_center_offset;
364                      if (loc == TGSI_INTERPOLATE_LOC_SAMPLE || (attrib == 0 && chan == 2 && sample_id)) {
365                         LLVMValueRef x_val_idx = LLVMBuildMul(gallivm->builder, sample_id, lp_build_const_int32(gallivm, 2), "");
366                         LLVMValueRef y_val_idx = LLVMBuildAdd(gallivm->builder, x_val_idx, lp_build_const_int32(gallivm, 1), "");
367 
368                         x_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, x_val_idx);
369                         y_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, y_val_idx);
370                         xoffset = lp_build_broadcast_scalar(coeff_bld, x_val_idx);
371                         yoffset = lp_build_broadcast_scalar(coeff_bld, y_val_idx);
372                      } else if (loc == TGSI_INTERPOLATE_LOC_CENTROID) {
373                         calc_centroid_offsets(bld, gallivm, loop_iter, mask_store,
374                                               pix_center_offset, &xoffset, &yoffset);
375                      }
376                      chan_pixoffx = lp_build_add(coeff_bld, chan_pixoffx, xoffset);
377                      chan_pixoffy = lp_build_add(coeff_bld, chan_pixoffy, yoffset);
378                   }
379                }
380 
381                /*
382                 * a = a0 + (x * dadx + y * dady)
383                 */
384                a = lp_build_fmuladd(builder, dadx, chan_pixoffx, a);
385                a = lp_build_fmuladd(builder, dady, chan_pixoffy, a);
386 
387                if (interp == LP_INTERP_PERSPECTIVE) {
388                   if (oow == NULL) {
389                      LLVMValueRef w = bld->attribs[0][3];
390                      assert(attrib != 0);
391                      assert(bld->mask[0] & TGSI_WRITEMASK_W);
392                      oow = lp_build_rcp(coeff_bld, w);
393                   }
394                   a = lp_build_mul(coeff_bld, a, oow);
395                }
396                break;
397 
398             case LP_INTERP_CONSTANT:
399             case LP_INTERP_FACING:
400                a = lp_build_extract_broadcast(gallivm, setup_bld->type,
401                                               coeff_bld->type, bld->a0aos[attrib],
402                                               index);
403                break;
404 
405             case LP_INTERP_POSITION:
406                assert(attrib > 0);
407                a = bld->attribs[0][chan];
408                break;
409 
410             default:
411                assert(0);
412                break;
413             }
414 
415             if ((attrib == 0) && (chan == 2) && !bld->depth_clamp){
416                /* FIXME: Depth values can exceed 1.0, due to the fact that
417                 * setup interpolation coefficients refer to (0,0) which causes
418                 * precision loss. So we must clamp to 1.0 here to avoid artifacts.
419                 * Note though values outside [0,1] are perfectly valid with
420                 * depth clip disabled.
421                 * XXX: If depth clip is disabled but we force depth clamp
422                 * we may get values larger than 1.0 in the fs (but not in
423                 * depth test). Not sure if that's an issue...
424                 * Also, on a similar note, it is not obvious if the depth values
425                 * appearing in fs (with depth clip disabled) should be clamped
426                 * to [0,1], clamped to near/far or not be clamped at all...
427                 */
428                a = lp_build_min(coeff_bld, a, coeff_bld->one);
429             }
430             bld->attribs[attrib][chan] = a;
431          }
432       }
433    }
434 }
435 
436 static LLVMValueRef
lp_build_interp_soa_indirect(struct lp_build_interp_soa_context * bld,struct gallivm_state * gallivm,unsigned attrib,unsigned chan,LLVMValueRef indir_index,LLVMValueRef pixoffx,LLVMValueRef pixoffy)437 lp_build_interp_soa_indirect(struct lp_build_interp_soa_context *bld,
438                              struct gallivm_state *gallivm,
439                              unsigned attrib, unsigned chan,
440                              LLVMValueRef indir_index,
441                              LLVMValueRef pixoffx,
442                              LLVMValueRef pixoffy)
443 {
444    LLVMBuilderRef builder = gallivm->builder;
445    struct lp_build_context *coeff_bld = &bld->coeff_bld;
446    const unsigned interp = bld->interp[attrib];
447    LLVMValueRef dadx = coeff_bld->zero;
448    LLVMValueRef dady = coeff_bld->zero;
449    LLVMValueRef a = coeff_bld->zero;
450 
451    LLVMTypeRef u8ptr = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
452 
453    indir_index = LLVMBuildAdd(builder, indir_index, lp_build_const_int_vec(gallivm, coeff_bld->type, attrib), "");
454    LLVMValueRef index = LLVMBuildMul(builder, indir_index, lp_build_const_int_vec(gallivm, coeff_bld->type, 4), "");
455    index = LLVMBuildAdd(builder, index, lp_build_const_int_vec(gallivm, coeff_bld->type, chan), "");
456 
457    /* size up to byte indices */
458    index = LLVMBuildMul(builder, index, lp_build_const_int_vec(gallivm, coeff_bld->type, 4), "");
459 
460    struct lp_type dst_type = coeff_bld->type;
461    dst_type.length = 1;
462    switch (interp) {
463    case LP_INTERP_PERSPECTIVE:
464       /* fall-through */
465    case LP_INTERP_LINEAR:
466 
467       dadx = lp_build_gather(gallivm, coeff_bld->type.length,
468                              coeff_bld->type.width, dst_type,
469                              true, LLVMBuildBitCast(builder, bld->dadx_ptr, u8ptr, ""), index, false);
470 
471       dady = lp_build_gather(gallivm, coeff_bld->type.length,
472                              coeff_bld->type.width, dst_type,
473                              true, LLVMBuildBitCast(builder, bld->dady_ptr, u8ptr, ""), index, false);
474 
475       a = lp_build_gather(gallivm, coeff_bld->type.length,
476                           coeff_bld->type.width, dst_type,
477                           true, LLVMBuildBitCast(builder, bld->a0_ptr, u8ptr, ""), index, false);
478 
479       /*
480        * a = a0 + (x * dadx + y * dady)
481        */
482       a = lp_build_fmuladd(builder, dadx, pixoffx, a);
483       a = lp_build_fmuladd(builder, dady, pixoffy, a);
484 
485       if (interp == LP_INTERP_PERSPECTIVE) {
486         LLVMValueRef w = bld->attribs[0][3];
487         assert(attrib != 0);
488         assert(bld->mask[0] & TGSI_WRITEMASK_W);
489         LLVMValueRef oow = lp_build_rcp(coeff_bld, w);
490         a = lp_build_mul(coeff_bld, a, oow);
491       }
492 
493       break;
494    case LP_INTERP_CONSTANT:
495    case LP_INTERP_FACING:
496       a = lp_build_gather(gallivm, coeff_bld->type.length,
497                           coeff_bld->type.width, dst_type,
498                           true, LLVMBuildBitCast(builder, bld->a0_ptr, u8ptr, ""), index, false);
499       break;
500    default:
501       assert(0);
502       break;
503    }
504    return a;
505 }
506 
507 LLVMValueRef
lp_build_interp_soa(struct lp_build_interp_soa_context * bld,struct gallivm_state * gallivm,LLVMValueRef loop_iter,LLVMValueRef mask_store,unsigned attrib,unsigned chan,unsigned loc,LLVMValueRef indir_index,LLVMValueRef offsets[2])508 lp_build_interp_soa(struct lp_build_interp_soa_context *bld,
509                     struct gallivm_state *gallivm,
510                     LLVMValueRef loop_iter,
511                     LLVMValueRef mask_store,
512                     unsigned attrib, unsigned chan,
513                     unsigned loc,
514                     LLVMValueRef indir_index,
515                     LLVMValueRef offsets[2])
516 {
517    LLVMBuilderRef builder = gallivm->builder;
518    struct lp_build_context *coeff_bld = &bld->coeff_bld;
519    struct lp_build_context *setup_bld = &bld->setup_bld;
520    LLVMValueRef pixoffx;
521    LLVMValueRef pixoffy;
522    LLVMValueRef ptr;
523 
524    /* could do this with code-generated passed in pixel offsets too */
525 
526    assert(loop_iter);
527    ptr = LLVMBuildGEP(builder, bld->xoffset_store, &loop_iter, 1, "");
528    pixoffx = LLVMBuildLoad(builder, ptr, "");
529    ptr = LLVMBuildGEP(builder, bld->yoffset_store, &loop_iter, 1, "");
530    pixoffy = LLVMBuildLoad(builder, ptr, "");
531 
532    pixoffx = LLVMBuildFAdd(builder, pixoffx,
533                            lp_build_broadcast_scalar(coeff_bld, bld->x), "");
534    pixoffy = LLVMBuildFAdd(builder, pixoffy,
535                            lp_build_broadcast_scalar(coeff_bld, bld->y), "");
536 
537    LLVMValueRef pix_center_offset = lp_build_const_vec(gallivm, coeff_bld->type, 0.5);
538 
539    if (loc == TGSI_INTERPOLATE_LOC_CENTER) {
540       if (bld->coverage_samples > 1) {
541          pixoffx = LLVMBuildFAdd(builder, pixoffx, pix_center_offset, "");
542          pixoffy = LLVMBuildFAdd(builder, pixoffy, pix_center_offset, "");
543       }
544 
545       if (offsets[0])
546          pixoffx = LLVMBuildFAdd(builder, pixoffx,
547                                  offsets[0], "");
548       if (offsets[1])
549          pixoffy = LLVMBuildFAdd(builder, pixoffy,
550                                  offsets[1], "");
551    } else if (loc == TGSI_INTERPOLATE_LOC_SAMPLE) {
552       LLVMValueRef x_val_idx = LLVMBuildMul(gallivm->builder, offsets[0], lp_build_const_int_vec(gallivm, bld->coeff_bld.type, 2 * 4), "");
553       LLVMValueRef y_val_idx = LLVMBuildAdd(gallivm->builder, x_val_idx, lp_build_const_int_vec(gallivm, bld->coeff_bld.type, 4), "");
554 
555       LLVMValueRef base_ptr = LLVMBuildBitCast(gallivm->builder, bld->sample_pos_array,
556                                                LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), "");
557       LLVMValueRef xoffset = lp_build_gather(gallivm,
558                                              bld->coeff_bld.type.length,
559                                              bld->coeff_bld.type.width,
560                                              lp_elem_type(bld->coeff_bld.type),
561                                              false,
562                                              base_ptr,
563                                              x_val_idx, true);
564       LLVMValueRef yoffset = lp_build_gather(gallivm,
565                                              bld->coeff_bld.type.length,
566                                              bld->coeff_bld.type.width,
567                                              lp_elem_type(bld->coeff_bld.type),
568                                              false,
569                                              base_ptr,
570                                              y_val_idx, true);
571 
572       if (bld->coverage_samples > 1) {
573          pixoffx = LLVMBuildFAdd(builder, pixoffx, xoffset, "");
574          pixoffy = LLVMBuildFAdd(builder, pixoffy, yoffset, "");
575       }
576    } else if (loc == TGSI_INTERPOLATE_LOC_CENTROID) {
577       LLVMValueRef centroid_x_offset, centroid_y_offset;
578 
579       /* for centroid find covered samples for this quad. */
580       /* if all samples are covered use pixel centers */
581       if (bld->coverage_samples > 1) {
582          calc_centroid_offsets(bld, gallivm, loop_iter, mask_store,
583 			       pix_center_offset, &centroid_x_offset, &centroid_y_offset);
584 
585          pixoffx = LLVMBuildFAdd(builder, pixoffx, centroid_x_offset, "");
586          pixoffy = LLVMBuildFAdd(builder, pixoffy, centroid_y_offset, "");
587       }
588    }
589 
590    // remap attrib properly.
591    attrib++;
592 
593    if (indir_index)
594      return lp_build_interp_soa_indirect(bld, gallivm, attrib, chan,
595 					 indir_index, pixoffx, pixoffy);
596 
597 
598    const unsigned interp = bld->interp[attrib];
599    LLVMValueRef dadx = coeff_bld->zero;
600    LLVMValueRef dady = coeff_bld->zero;
601    LLVMValueRef a = coeff_bld->zero;
602 
603    LLVMValueRef index = lp_build_const_int32(gallivm, chan);
604 
605    switch (interp) {
606    case LP_INTERP_PERSPECTIVE:
607       /* fall-through */
608    case LP_INTERP_LINEAR:
609       dadx = lp_build_extract_broadcast(gallivm, setup_bld->type,
610                                         coeff_bld->type, bld->dadxaos[attrib],
611                                         index);
612 
613       dady = lp_build_extract_broadcast(gallivm, setup_bld->type,
614                                         coeff_bld->type, bld->dadyaos[attrib],
615                                         index);
616 
617       a = lp_build_extract_broadcast(gallivm, setup_bld->type,
618                                      coeff_bld->type, bld->a0aos[attrib],
619                                      index);
620 
621       /*
622        * a = a0 + (x * dadx + y * dady)
623        */
624       a = lp_build_fmuladd(builder, dadx, pixoffx, a);
625       a = lp_build_fmuladd(builder, dady, pixoffy, a);
626 
627       if (interp == LP_INTERP_PERSPECTIVE) {
628         LLVMValueRef w = bld->attribs[0][3];
629         assert(attrib != 0);
630         assert(bld->mask[0] & TGSI_WRITEMASK_W);
631         LLVMValueRef oow = lp_build_rcp(coeff_bld, w);
632         a = lp_build_mul(coeff_bld, a, oow);
633       }
634 
635       break;
636    case LP_INTERP_CONSTANT:
637    case LP_INTERP_FACING:
638       a = lp_build_extract_broadcast(gallivm, setup_bld->type,
639                                      coeff_bld->type, bld->a0aos[attrib],
640                                      index);
641       break;
642    default:
643       assert(0);
644       break;
645    }
646    return a;
647 }
648 
649 /**
650  * Generate the position vectors.
651  *
652  * Parameter x0, y0 are the integer values with upper left coordinates.
653  */
654 static void
pos_init(struct lp_build_interp_soa_context * bld,LLVMValueRef x0,LLVMValueRef y0)655 pos_init(struct lp_build_interp_soa_context *bld,
656          LLVMValueRef x0,
657          LLVMValueRef y0)
658 {
659    LLVMBuilderRef builder = bld->coeff_bld.gallivm->builder;
660    struct lp_build_context *coeff_bld = &bld->coeff_bld;
661 
662    bld->x = LLVMBuildSIToFP(builder, x0, coeff_bld->elem_type, "");
663    bld->y = LLVMBuildSIToFP(builder, y0, coeff_bld->elem_type, "");
664 }
665 
666 
667 /**
668  * Initialize fragment shader input attribute info.
669  */
670 void
lp_build_interp_soa_init(struct lp_build_interp_soa_context * bld,struct gallivm_state * gallivm,unsigned num_inputs,const struct lp_shader_input * inputs,boolean pixel_center_integer,unsigned coverage_samples,LLVMValueRef sample_pos_array,LLVMValueRef num_loop,boolean depth_clamp,LLVMBuilderRef builder,struct lp_type type,LLVMValueRef a0_ptr,LLVMValueRef dadx_ptr,LLVMValueRef dady_ptr,LLVMValueRef x0,LLVMValueRef y0)671 lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
672                          struct gallivm_state *gallivm,
673                          unsigned num_inputs,
674                          const struct lp_shader_input *inputs,
675                          boolean pixel_center_integer,
676                          unsigned coverage_samples,
677                          LLVMValueRef sample_pos_array,
678                          LLVMValueRef num_loop,
679                          boolean depth_clamp,
680                          LLVMBuilderRef builder,
681                          struct lp_type type,
682                          LLVMValueRef a0_ptr,
683                          LLVMValueRef dadx_ptr,
684                          LLVMValueRef dady_ptr,
685                          LLVMValueRef x0,
686                          LLVMValueRef y0)
687 {
688    struct lp_type coeff_type;
689    struct lp_type setup_type;
690    unsigned attrib;
691    unsigned chan;
692 
693    memset(bld, 0, sizeof *bld);
694 
695    memset(&coeff_type, 0, sizeof coeff_type);
696    coeff_type.floating = TRUE;
697    coeff_type.sign = TRUE;
698    coeff_type.width = 32;
699    coeff_type.length = type.length;
700 
701    memset(&setup_type, 0, sizeof setup_type);
702    setup_type.floating = TRUE;
703    setup_type.sign = TRUE;
704    setup_type.width = 32;
705    setup_type.length = TGSI_NUM_CHANNELS;
706 
707 
708    /* XXX: we don't support interpolating into any other types */
709    assert(memcmp(&coeff_type, &type, sizeof coeff_type) == 0);
710 
711    lp_build_context_init(&bld->coeff_bld, gallivm, coeff_type);
712    lp_build_context_init(&bld->setup_bld, gallivm, setup_type);
713 
714    /* For convenience */
715    bld->pos = bld->attribs[0];
716    bld->inputs = (const LLVMValueRef (*)[TGSI_NUM_CHANNELS]) bld->attribs[1];
717 
718    /* Position */
719    bld->mask[0] = TGSI_WRITEMASK_XYZW;
720    bld->interp[0] = LP_INTERP_LINEAR;
721    bld->interp_loc[0] = 0;
722 
723    /* Inputs */
724    for (attrib = 0; attrib < num_inputs; ++attrib) {
725       bld->mask[1 + attrib] = inputs[attrib].usage_mask;
726       bld->interp[1 + attrib] = inputs[attrib].interp;
727       bld->interp_loc[1 + attrib] = inputs[attrib].location;
728    }
729    bld->num_attribs = 1 + num_inputs;
730 
731    /* needed for indirect */
732    bld->a0_ptr = a0_ptr;
733    bld->dadx_ptr = dadx_ptr;
734    bld->dady_ptr = dady_ptr;
735 
736    /* Ensure all masked out input channels have a valid value */
737    for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
738       for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
739          bld->attribs[attrib][chan] = bld->coeff_bld.undef;
740       }
741    }
742 
743    if (pixel_center_integer) {
744       bld->pos_offset = 0.0;
745    } else {
746       bld->pos_offset = 0.5;
747    }
748    bld->depth_clamp = depth_clamp;
749    bld->coverage_samples = coverage_samples;
750    bld->num_loop = num_loop;
751    bld->sample_pos_array = sample_pos_array;
752 
753    pos_init(bld, x0, y0);
754 
755    /*
756     * Simple method (single step interpolation) may be slower if vector length
757     * is just 4, but the results are different (generally less accurate) with
758     * the other method, so always use more accurate version.
759     */
760    {
761       /* XXX this should use a global static table */
762       unsigned i;
763       unsigned num_loops = 16 / type.length;
764       LLVMValueRef pixoffx, pixoffy, index;
765       LLVMValueRef ptr;
766 
767       bld->xoffset_store = lp_build_array_alloca(gallivm,
768                                                  lp_build_vec_type(gallivm, type),
769                                                  lp_build_const_int32(gallivm, num_loops),
770                                                  "");
771       bld->yoffset_store = lp_build_array_alloca(gallivm,
772                                                  lp_build_vec_type(gallivm, type),
773                                                  lp_build_const_int32(gallivm, num_loops),
774                                                  "");
775       for (i = 0; i < num_loops; i++) {
776          index = lp_build_const_int32(gallivm, i);
777          calc_offsets(&bld->coeff_bld, i*type.length/4, &pixoffx, &pixoffy);
778          ptr = LLVMBuildGEP(builder, bld->xoffset_store, &index, 1, "");
779          LLVMBuildStore(builder, pixoffx, ptr);
780          ptr = LLVMBuildGEP(builder, bld->yoffset_store, &index, 1, "");
781          LLVMBuildStore(builder, pixoffy, ptr);
782       }
783    }
784    coeffs_init_simple(bld, a0_ptr, dadx_ptr, dady_ptr);
785 }
786 
787 
788 /*
789  * Advance the position and inputs to the given quad within the block.
790  */
791 
792 void
lp_build_interp_soa_update_inputs_dyn(struct lp_build_interp_soa_context * bld,struct gallivm_state * gallivm,LLVMValueRef quad_start_index,LLVMValueRef mask_store,LLVMValueRef sample_id)793 lp_build_interp_soa_update_inputs_dyn(struct lp_build_interp_soa_context *bld,
794                                       struct gallivm_state *gallivm,
795                                       LLVMValueRef quad_start_index,
796                                       LLVMValueRef mask_store,
797                                       LLVMValueRef sample_id)
798 {
799    attribs_update_simple(bld, gallivm, quad_start_index, mask_store, sample_id, 1, bld->num_attribs);
800 }
801 
802 void
lp_build_interp_soa_update_pos_dyn(struct lp_build_interp_soa_context * bld,struct gallivm_state * gallivm,LLVMValueRef quad_start_index,LLVMValueRef sample_id)803 lp_build_interp_soa_update_pos_dyn(struct lp_build_interp_soa_context *bld,
804                                    struct gallivm_state *gallivm,
805                                    LLVMValueRef quad_start_index,
806                                    LLVMValueRef sample_id)
807 {
808    attribs_update_simple(bld, gallivm, quad_start_index, NULL, sample_id, 0, 1);
809 }
810 
811