1 /**************************************************************************
2  *
3  * Copyright 2009 VMware, Inc.
4  * Copyright 2007-2008 VMware, Inc.
5  * All Rights Reserved.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the
9  * "Software"), to deal in the Software without restriction, including
10  * without limitation the rights to use, copy, modify, merge, publish,
11  * distribute, sub license, and/or sell copies of the Software, and to
12  * permit persons to whom the Software is furnished to do so, subject to
13  * the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the
16  * next paragraph) shall be included in all copies or substantial portions
17  * of the Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26  *
27  **************************************************************************/
28 
29 /**
30  * @file
31  * Position and shader input interpolation.
32  *
33  * @author Jose Fonseca <jfonseca@vmware.com>
34  */
35 
36 #include "pipe/p_shader_tokens.h"
37 #include "util/compiler.h"
38 #include "util/u_debug.h"
39 #include "util/u_memory.h"
40 #include "util/u_math.h"
41 #include "tgsi/tgsi_scan.h"
42 #include "gallivm/lp_bld_debug.h"
43 #include "gallivm/lp_bld_const.h"
44 #include "gallivm/lp_bld_arit.h"
45 #include "gallivm/lp_bld_swizzle.h"
46 #include "gallivm/lp_bld_flow.h"
47 #include "gallivm/lp_bld_logic.h"
48 #include "gallivm/lp_bld_struct.h"
49 #include "gallivm/lp_bld_gather.h"
50 #include "lp_bld_interp.h"
51 
52 
53 /*
54  * The shader JIT function operates on blocks of quads.
55  * Each block has 2x2 quads and each quad has 2x2 pixels.
56  *
57  * We iterate over the quads in order 0, 1, 2, 3:
58  *
59  * #################
60  * #   |   #   |   #
61  * #---0---#---1---#
62  * #   |   #   |   #
63  * #################
64  * #   |   #   |   #
65  * #---2---#---3---#
66  * #   |   #   |   #
67  * #################
68  *
69  * If we iterate over multiple quads at once, quads 01 and 23 are processed
70  * together.
71  *
72  * Within each quad, we have four pixels which are represented in SOA
73  * order:
74  *
75  * #########
76  * # 0 | 1 #
77  * #---+---#
78  * # 2 | 3 #
79  * #########
80  *
81  * So the green channel (for example) of the four pixels is stored in
82  * a single vector register: {g0, g1, g2, g3}.
83  * The order stays the same even with multiple quads:
84  * 0 1 4 5
85  * 2 3 6 7
86  * is stored as g0..g7
87  */
88 
89 
90 /**
91  * Do one perspective divide per quad.
92  *
93  * For perspective interpolation, the final attribute value is given
94  *
95  *  a' = a/w = a * oow
96  *
97  * where
98  *
99  *  a = a0 + dadx*x + dady*y
100  *  w = w0 + dwdx*x + dwdy*y
101  *  oow = 1/w = 1/(w0 + dwdx*x + dwdy*y)
102  *
103  * Instead of computing the division per pixel, with this macro we compute the
104  * division on the upper left pixel of each quad, and use a linear
105  * approximation in the remaining pixels, given by:
106  *
107  *  da'dx = (dadx - dwdx*a)*oow
108  *  da'dy = (dady - dwdy*a)*oow
109  *
110  * Ironically, this actually makes things slower -- probably because the
111  * divide hardware unit is rarely used, whereas the multiply unit is typically
112  * already saturated.
113  */
114 #define PERSPECTIVE_DIVIDE_PER_QUAD 0
115 
116 
117 static const unsigned char quad_offset_x[16] = {0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3};
118 static const unsigned char quad_offset_y[16] = {0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3};
119 
120 
121 static void
attrib_name(LLVMValueRef val,unsigned attrib,unsigned chan,const char * suffix)122 attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix)
123 {
124    if(attrib == 0)
125       lp_build_name(val, "pos.%c%s", "xyzw"[chan], suffix);
126    else
127       lp_build_name(val, "input%u.%c%s", attrib - 1, "xyzw"[chan], suffix);
128 }
129 
130 static void
calc_offsets(struct lp_build_context * coeff_bld,unsigned quad_start_index,LLVMValueRef * pixoffx,LLVMValueRef * pixoffy)131 calc_offsets(struct lp_build_context *coeff_bld,
132              unsigned quad_start_index,
133              LLVMValueRef *pixoffx,
134              LLVMValueRef *pixoffy)
135 {
136    unsigned i;
137    unsigned num_pix = coeff_bld->type.length;
138    struct gallivm_state *gallivm = coeff_bld->gallivm;
139    LLVMBuilderRef builder = coeff_bld->gallivm->builder;
140    LLVMValueRef nr, pixxf, pixyf;
141 
142    *pixoffx = coeff_bld->undef;
143    *pixoffy = coeff_bld->undef;
144 
145    for (i = 0; i < num_pix; i++) {
146       nr = lp_build_const_int32(gallivm, i);
147       pixxf = lp_build_const_float(gallivm, quad_offset_x[i % num_pix] +
148                                    (quad_start_index & 1) * 2);
149       pixyf = lp_build_const_float(gallivm, quad_offset_y[i % num_pix] +
150                                    (quad_start_index & 2));
151       *pixoffx = LLVMBuildInsertElement(builder, *pixoffx, pixxf, nr, "");
152       *pixoffy = LLVMBuildInsertElement(builder, *pixoffy, pixyf, nr, "");
153    }
154 }
155 
156 static void
calc_centroid_offsets(struct lp_build_interp_soa_context * bld,struct gallivm_state * gallivm,LLVMValueRef loop_iter,LLVMValueRef mask_store,LLVMValueRef pix_center_offset,LLVMValueRef * centroid_x,LLVMValueRef * centroid_y)157 calc_centroid_offsets(struct lp_build_interp_soa_context *bld,
158                       struct gallivm_state *gallivm,
159                       LLVMValueRef loop_iter,
160                       LLVMValueRef mask_store,
161                       LLVMValueRef pix_center_offset,
162                       LLVMValueRef *centroid_x, LLVMValueRef *centroid_y)
163 {
164    struct lp_build_context *coeff_bld = &bld->coeff_bld;
165    LLVMBuilderRef builder = gallivm->builder;
166    LLVMValueRef s_mask_and = NULL;
167    LLVMValueRef centroid_x_offset = pix_center_offset;
168    LLVMValueRef centroid_y_offset = pix_center_offset;
169    for (int s = bld->coverage_samples - 1; s >= 0; s--) {
170       LLVMValueRef sample_cov;
171       LLVMValueRef s_mask_idx = LLVMBuildMul(builder, bld->num_loop, lp_build_const_int32(gallivm, s), "");
172 
173       s_mask_idx = LLVMBuildAdd(builder, s_mask_idx, loop_iter, "");
174       sample_cov = lp_build_pointer_get(builder, mask_store, s_mask_idx);
175       if (s == bld->coverage_samples - 1)
176          s_mask_and = sample_cov;
177       else
178          s_mask_and = LLVMBuildAnd(builder, s_mask_and, sample_cov, "");
179 
180       LLVMValueRef x_val_idx = lp_build_const_int32(gallivm, s * 2);
181       LLVMValueRef y_val_idx = lp_build_const_int32(gallivm, s * 2 + 1);
182 
183       x_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, x_val_idx);
184       y_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, y_val_idx);
185       x_val_idx = lp_build_broadcast_scalar(coeff_bld, x_val_idx);
186       y_val_idx = lp_build_broadcast_scalar(coeff_bld, y_val_idx);
187       centroid_x_offset = lp_build_select(coeff_bld, sample_cov, x_val_idx, centroid_x_offset);
188       centroid_y_offset = lp_build_select(coeff_bld, sample_cov, y_val_idx, centroid_y_offset);
189    }
190    *centroid_x = lp_build_select(coeff_bld, s_mask_and, pix_center_offset, centroid_x_offset);
191    *centroid_y = lp_build_select(coeff_bld, s_mask_and, pix_center_offset, centroid_y_offset);
192 }
193 
194 /* Much easier, and significantly less instructions in the per-stamp
195  * part (less than half) but overall more instructions so a loss if
196  * most quads are active. Might be a win though with larger vectors.
197  * No ability to do per-quad divide (doable but not implemented)
198  * Could be made to work with passed in pixel offsets (i.e. active quad merging).
199  */
200 static void
coeffs_init_simple(struct lp_build_interp_soa_context * bld,LLVMValueRef a0_ptr,LLVMValueRef dadx_ptr,LLVMValueRef dady_ptr)201 coeffs_init_simple(struct lp_build_interp_soa_context *bld,
202                    LLVMValueRef a0_ptr,
203                    LLVMValueRef dadx_ptr,
204                    LLVMValueRef dady_ptr)
205 {
206    struct lp_build_context *coeff_bld = &bld->coeff_bld;
207    struct lp_build_context *setup_bld = &bld->setup_bld;
208    struct gallivm_state *gallivm = coeff_bld->gallivm;
209    LLVMBuilderRef builder = gallivm->builder;
210    unsigned attrib;
211 
212    for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
213       /*
214        * always fetch all 4 values for performance/simplicity
215        * Note: we do that here because it seems to generate better
216        * code. It generates a lot of moves initially but less
217        * moves later. As far as I can tell this looks like a
218        * llvm issue, instead of simply reloading the values from
219        * the passed in pointers it if it runs out of registers
220        * it spills/reloads them. Maybe some optimization passes
221        * would help.
222        * Might want to investigate this again later.
223        */
224       const unsigned interp = bld->interp[attrib];
225       LLVMValueRef index = lp_build_const_int32(gallivm,
226                                 attrib * TGSI_NUM_CHANNELS);
227       LLVMValueRef ptr;
228       LLVMValueRef dadxaos = setup_bld->zero;
229       LLVMValueRef dadyaos = setup_bld->zero;
230       LLVMValueRef a0aos = setup_bld->zero;
231 
232       switch (interp) {
233       case LP_INTERP_PERSPECTIVE:
234          FALLTHROUGH;
235 
236       case LP_INTERP_LINEAR:
237          ptr = LLVMBuildGEP(builder, dadx_ptr, &index, 1, "");
238          ptr = LLVMBuildBitCast(builder, ptr,
239                LLVMPointerType(setup_bld->vec_type, 0), "");
240          dadxaos = LLVMBuildLoad(builder, ptr, "");
241 
242          ptr = LLVMBuildGEP(builder, dady_ptr, &index, 1, "");
243          ptr = LLVMBuildBitCast(builder, ptr,
244                LLVMPointerType(setup_bld->vec_type, 0), "");
245          dadyaos = LLVMBuildLoad(builder, ptr, "");
246 
247          attrib_name(dadxaos, attrib, 0, ".dadxaos");
248          attrib_name(dadyaos, attrib, 0, ".dadyaos");
249          FALLTHROUGH;
250 
251       case LP_INTERP_CONSTANT:
252       case LP_INTERP_FACING:
253          ptr = LLVMBuildGEP(builder, a0_ptr, &index, 1, "");
254          ptr = LLVMBuildBitCast(builder, ptr,
255                LLVMPointerType(setup_bld->vec_type, 0), "");
256          a0aos = LLVMBuildLoad(builder, ptr, "");
257          attrib_name(a0aos, attrib, 0, ".a0aos");
258          break;
259 
260       case LP_INTERP_POSITION:
261          /* Nothing to do as the position coeffs are already setup in slot 0 */
262          continue;
263 
264       default:
265          assert(0);
266          break;
267       }
268       bld->a0aos[attrib] = a0aos;
269       bld->dadxaos[attrib] = dadxaos;
270       bld->dadyaos[attrib] = dadyaos;
271    }
272 }
273 
274 /**
275  * Interpolate the shader input attribute values.
276  * This is called for each (group of) quad(s).
277  */
278 static void
attribs_update_simple(struct lp_build_interp_soa_context * bld,struct gallivm_state * gallivm,LLVMValueRef loop_iter,LLVMValueRef mask_store,LLVMValueRef sample_id,int start,int end)279 attribs_update_simple(struct lp_build_interp_soa_context *bld,
280                       struct gallivm_state *gallivm,
281                       LLVMValueRef loop_iter,
282                       LLVMValueRef mask_store,
283                       LLVMValueRef sample_id,
284                       int start,
285                       int end)
286 {
287    LLVMBuilderRef builder = gallivm->builder;
288    struct lp_build_context *coeff_bld = &bld->coeff_bld;
289    struct lp_build_context *setup_bld = &bld->setup_bld;
290    LLVMValueRef oow = NULL;
291    unsigned attrib;
292    LLVMValueRef pixoffx;
293    LLVMValueRef pixoffy;
294    LLVMValueRef ptr;
295    LLVMValueRef pix_center_offset = lp_build_const_vec(gallivm, coeff_bld->type, 0.5);
296 
297    /* could do this with code-generated passed in pixel offsets too */
298 
299    assert(loop_iter);
300    ptr = LLVMBuildGEP(builder, bld->xoffset_store, &loop_iter, 1, "");
301    pixoffx = LLVMBuildLoad(builder, ptr, "");
302    ptr = LLVMBuildGEP(builder, bld->yoffset_store, &loop_iter, 1, "");
303    pixoffy = LLVMBuildLoad(builder, ptr, "");
304 
305    pixoffx = LLVMBuildFAdd(builder, pixoffx,
306                            lp_build_broadcast_scalar(coeff_bld, bld->x), "");
307    pixoffy = LLVMBuildFAdd(builder, pixoffy,
308                            lp_build_broadcast_scalar(coeff_bld, bld->y), "");
309 
310    for (attrib = start; attrib < end; attrib++) {
311       const unsigned mask = bld->mask[attrib];
312       const unsigned interp = bld->interp[attrib];
313       const unsigned loc = bld->interp_loc[attrib];
314       unsigned chan;
315 
316       for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
317          if (mask & (1 << chan)) {
318             LLVMValueRef index;
319             LLVMValueRef dadx = coeff_bld->zero;
320             LLVMValueRef dady = coeff_bld->zero;
321             LLVMValueRef a = coeff_bld->zero;
322             LLVMValueRef chan_pixoffx = pixoffx, chan_pixoffy = pixoffy;
323 
324             index = lp_build_const_int32(gallivm, chan);
325             switch (interp) {
326             case LP_INTERP_PERSPECTIVE:
327                FALLTHROUGH;
328 
329             case LP_INTERP_LINEAR:
330                if (attrib == 0 && chan == 0) {
331                   dadx = coeff_bld->one;
332                   if (sample_id) {
333                      LLVMValueRef x_val_idx = LLVMBuildMul(gallivm->builder, sample_id, lp_build_const_int32(gallivm, 2), "");
334                      x_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, x_val_idx);
335                      a = lp_build_broadcast_scalar(coeff_bld, x_val_idx);
336                   } else {
337                      a = lp_build_const_vec(gallivm, coeff_bld->type, bld->pos_offset);
338                   }
339                }
340                else if (attrib == 0 && chan == 1) {
341                   dady = coeff_bld->one;
342                   if (sample_id) {
343                      LLVMValueRef y_val_idx = LLVMBuildMul(gallivm->builder, sample_id, lp_build_const_int32(gallivm, 2), "");
344                      y_val_idx = LLVMBuildAdd(gallivm->builder, y_val_idx, lp_build_const_int32(gallivm, 1), "");
345                      y_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, y_val_idx);
346                      a = lp_build_broadcast_scalar(coeff_bld, y_val_idx);
347                   } else {
348                      a = lp_build_const_vec(gallivm, coeff_bld->type, bld->pos_offset);
349                   }
350                }
351                else {
352                   dadx = lp_build_extract_broadcast(gallivm, setup_bld->type,
353                                                     coeff_bld->type, bld->dadxaos[attrib],
354                                                     index);
355                   dady = lp_build_extract_broadcast(gallivm, setup_bld->type,
356                                                     coeff_bld->type, bld->dadyaos[attrib],
357                                                     index);
358                   a = lp_build_extract_broadcast(gallivm, setup_bld->type,
359                                                  coeff_bld->type, bld->a0aos[attrib],
360                                                  index);
361 
362                   if (bld->coverage_samples > 1) {
363                      LLVMValueRef xoffset = pix_center_offset;
364                      LLVMValueRef yoffset = pix_center_offset;
365                      if (loc == TGSI_INTERPOLATE_LOC_SAMPLE || (attrib == 0 && chan == 2 && sample_id)) {
366                         LLVMValueRef x_val_idx = LLVMBuildMul(gallivm->builder, sample_id, lp_build_const_int32(gallivm, 2), "");
367                         LLVMValueRef y_val_idx = LLVMBuildAdd(gallivm->builder, x_val_idx, lp_build_const_int32(gallivm, 1), "");
368 
369                         x_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, x_val_idx);
370                         y_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, y_val_idx);
371                         xoffset = lp_build_broadcast_scalar(coeff_bld, x_val_idx);
372                         yoffset = lp_build_broadcast_scalar(coeff_bld, y_val_idx);
373                      } else if (loc == TGSI_INTERPOLATE_LOC_CENTROID) {
374                         calc_centroid_offsets(bld, gallivm, loop_iter, mask_store,
375                                               pix_center_offset, &xoffset, &yoffset);
376                      }
377                      chan_pixoffx = lp_build_add(coeff_bld, chan_pixoffx, xoffset);
378                      chan_pixoffy = lp_build_add(coeff_bld, chan_pixoffy, yoffset);
379                   }
380                }
381 
382                /*
383                 * a = a0 + (x * dadx + y * dady)
384                 */
385                a = lp_build_fmuladd(builder, dadx, chan_pixoffx, a);
386                a = lp_build_fmuladd(builder, dady, chan_pixoffy, a);
387 
388                if (interp == LP_INTERP_PERSPECTIVE) {
389                   if (oow == NULL) {
390                      LLVMValueRef w = bld->attribs[0][3];
391                      assert(attrib != 0);
392                      assert(bld->mask[0] & TGSI_WRITEMASK_W);
393                      oow = lp_build_rcp(coeff_bld, w);
394                   }
395                   a = lp_build_mul(coeff_bld, a, oow);
396                }
397                break;
398 
399             case LP_INTERP_CONSTANT:
400             case LP_INTERP_FACING:
401                a = lp_build_extract_broadcast(gallivm, setup_bld->type,
402                                               coeff_bld->type, bld->a0aos[attrib],
403                                               index);
404                break;
405 
406             case LP_INTERP_POSITION:
407                assert(attrib > 0);
408                a = bld->attribs[0][chan];
409                break;
410 
411             default:
412                assert(0);
413                break;
414             }
415 
416             if ((attrib == 0) && (chan == 2)) {
417                /* add polygon-offset value, stored in the X component of a0 */
418                LLVMValueRef offset =
419                   lp_build_extract_broadcast(gallivm, setup_bld->type,
420                                              coeff_bld->type, bld->a0aos[0],
421                                              lp_build_const_int32(gallivm, 0));
422                a = LLVMBuildFAdd(builder, a, offset, "");
423 
424                if (!bld->depth_clamp){
425                   /* OpenGL requires clamping z to 0..1 range after polgon offset
426                   * is applied if depth-clamping isn't enabled.
427                   *
428                   * This also fixes the problem that depth values can exceed 1.0,
429                   * due to imprecision in the calculations.
430                   */
431                   a = lp_build_clamp(coeff_bld, a, coeff_bld->zero, coeff_bld->one);
432                }
433             }
434 
435             bld->attribs[attrib][chan] = a;
436          }
437       }
438    }
439 }
440 
441 static LLVMValueRef
lp_build_interp_soa_indirect(struct lp_build_interp_soa_context * bld,struct gallivm_state * gallivm,unsigned attrib,unsigned chan,LLVMValueRef indir_index,LLVMValueRef pixoffx,LLVMValueRef pixoffy)442 lp_build_interp_soa_indirect(struct lp_build_interp_soa_context *bld,
443                              struct gallivm_state *gallivm,
444                              unsigned attrib, unsigned chan,
445                              LLVMValueRef indir_index,
446                              LLVMValueRef pixoffx,
447                              LLVMValueRef pixoffy)
448 {
449    LLVMBuilderRef builder = gallivm->builder;
450    struct lp_build_context *coeff_bld = &bld->coeff_bld;
451    const unsigned interp = bld->interp[attrib];
452    LLVMValueRef dadx = coeff_bld->zero;
453    LLVMValueRef dady = coeff_bld->zero;
454    LLVMValueRef a = coeff_bld->zero;
455 
456    LLVMTypeRef u8ptr = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
457 
458    indir_index = LLVMBuildAdd(builder, indir_index, lp_build_const_int_vec(gallivm, coeff_bld->type, attrib), "");
459    LLVMValueRef index = LLVMBuildMul(builder, indir_index, lp_build_const_int_vec(gallivm, coeff_bld->type, 4), "");
460    index = LLVMBuildAdd(builder, index, lp_build_const_int_vec(gallivm, coeff_bld->type, chan), "");
461 
462    /* size up to byte indices */
463    index = LLVMBuildMul(builder, index, lp_build_const_int_vec(gallivm, coeff_bld->type, 4), "");
464 
465    struct lp_type dst_type = coeff_bld->type;
466    dst_type.length = 1;
467    switch (interp) {
468    case LP_INTERP_PERSPECTIVE:
469       FALLTHROUGH;
470    case LP_INTERP_LINEAR:
471 
472       dadx = lp_build_gather(gallivm, coeff_bld->type.length,
473                              coeff_bld->type.width, dst_type,
474                              true, LLVMBuildBitCast(builder, bld->dadx_ptr, u8ptr, ""), index, false);
475 
476       dady = lp_build_gather(gallivm, coeff_bld->type.length,
477                              coeff_bld->type.width, dst_type,
478                              true, LLVMBuildBitCast(builder, bld->dady_ptr, u8ptr, ""), index, false);
479 
480       a = lp_build_gather(gallivm, coeff_bld->type.length,
481                           coeff_bld->type.width, dst_type,
482                           true, LLVMBuildBitCast(builder, bld->a0_ptr, u8ptr, ""), index, false);
483 
484       /*
485        * a = a0 + (x * dadx + y * dady)
486        */
487       a = lp_build_fmuladd(builder, dadx, pixoffx, a);
488       a = lp_build_fmuladd(builder, dady, pixoffy, a);
489 
490       if (interp == LP_INTERP_PERSPECTIVE) {
491         LLVMValueRef w = bld->attribs[0][3];
492         assert(attrib != 0);
493         assert(bld->mask[0] & TGSI_WRITEMASK_W);
494         LLVMValueRef oow = lp_build_rcp(coeff_bld, w);
495         a = lp_build_mul(coeff_bld, a, oow);
496       }
497 
498       break;
499    case LP_INTERP_CONSTANT:
500    case LP_INTERP_FACING:
501       a = lp_build_gather(gallivm, coeff_bld->type.length,
502                           coeff_bld->type.width, dst_type,
503                           true, LLVMBuildBitCast(builder, bld->a0_ptr, u8ptr, ""), index, false);
504       break;
505    default:
506       assert(0);
507       break;
508    }
509    return a;
510 }
511 
512 LLVMValueRef
lp_build_interp_soa(struct lp_build_interp_soa_context * bld,struct gallivm_state * gallivm,LLVMValueRef loop_iter,LLVMValueRef mask_store,unsigned attrib,unsigned chan,unsigned loc,LLVMValueRef indir_index,LLVMValueRef offsets[2])513 lp_build_interp_soa(struct lp_build_interp_soa_context *bld,
514                     struct gallivm_state *gallivm,
515                     LLVMValueRef loop_iter,
516                     LLVMValueRef mask_store,
517                     unsigned attrib, unsigned chan,
518                     unsigned loc,
519                     LLVMValueRef indir_index,
520                     LLVMValueRef offsets[2])
521 {
522    LLVMBuilderRef builder = gallivm->builder;
523    struct lp_build_context *coeff_bld = &bld->coeff_bld;
524    struct lp_build_context *setup_bld = &bld->setup_bld;
525    LLVMValueRef pixoffx;
526    LLVMValueRef pixoffy;
527    LLVMValueRef ptr;
528 
529    /* could do this with code-generated passed in pixel offsets too */
530 
531    assert(loop_iter);
532    ptr = LLVMBuildGEP(builder, bld->xoffset_store, &loop_iter, 1, "");
533    pixoffx = LLVMBuildLoad(builder, ptr, "");
534    ptr = LLVMBuildGEP(builder, bld->yoffset_store, &loop_iter, 1, "");
535    pixoffy = LLVMBuildLoad(builder, ptr, "");
536 
537    pixoffx = LLVMBuildFAdd(builder, pixoffx,
538                            lp_build_broadcast_scalar(coeff_bld, bld->x), "");
539    pixoffy = LLVMBuildFAdd(builder, pixoffy,
540                            lp_build_broadcast_scalar(coeff_bld, bld->y), "");
541 
542    LLVMValueRef pix_center_offset = lp_build_const_vec(gallivm, coeff_bld->type, 0.5);
543 
544    if (loc == TGSI_INTERPOLATE_LOC_CENTER) {
545       if (bld->coverage_samples > 1) {
546          pixoffx = LLVMBuildFAdd(builder, pixoffx, pix_center_offset, "");
547          pixoffy = LLVMBuildFAdd(builder, pixoffy, pix_center_offset, "");
548       }
549 
550       if (offsets[0])
551          pixoffx = LLVMBuildFAdd(builder, pixoffx,
552                                  offsets[0], "");
553       if (offsets[1])
554          pixoffy = LLVMBuildFAdd(builder, pixoffy,
555                                  offsets[1], "");
556    } else if (loc == TGSI_INTERPOLATE_LOC_SAMPLE) {
557       LLVMValueRef x_val_idx = LLVMBuildMul(gallivm->builder, offsets[0], lp_build_const_int_vec(gallivm, bld->coeff_bld.type, 2 * 4), "");
558       LLVMValueRef y_val_idx = LLVMBuildAdd(gallivm->builder, x_val_idx, lp_build_const_int_vec(gallivm, bld->coeff_bld.type, 4), "");
559 
560       LLVMValueRef base_ptr = LLVMBuildBitCast(gallivm->builder, bld->sample_pos_array,
561                                                LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), "");
562       LLVMValueRef xoffset = lp_build_gather(gallivm,
563                                              bld->coeff_bld.type.length,
564                                              bld->coeff_bld.type.width,
565                                              lp_elem_type(bld->coeff_bld.type),
566                                              false,
567                                              base_ptr,
568                                              x_val_idx, true);
569       LLVMValueRef yoffset = lp_build_gather(gallivm,
570                                              bld->coeff_bld.type.length,
571                                              bld->coeff_bld.type.width,
572                                              lp_elem_type(bld->coeff_bld.type),
573                                              false,
574                                              base_ptr,
575                                              y_val_idx, true);
576 
577       if (bld->coverage_samples > 1) {
578          pixoffx = LLVMBuildFAdd(builder, pixoffx, xoffset, "");
579          pixoffy = LLVMBuildFAdd(builder, pixoffy, yoffset, "");
580       }
581    } else if (loc == TGSI_INTERPOLATE_LOC_CENTROID) {
582       LLVMValueRef centroid_x_offset, centroid_y_offset;
583 
584       /* for centroid find covered samples for this quad. */
585       /* if all samples are covered use pixel centers */
586       if (bld->coverage_samples > 1) {
587          calc_centroid_offsets(bld, gallivm, loop_iter, mask_store,
588 			       pix_center_offset, &centroid_x_offset, &centroid_y_offset);
589 
590          pixoffx = LLVMBuildFAdd(builder, pixoffx, centroid_x_offset, "");
591          pixoffy = LLVMBuildFAdd(builder, pixoffy, centroid_y_offset, "");
592       }
593    }
594 
595    // remap attrib properly.
596    attrib++;
597 
598    if (indir_index)
599      return lp_build_interp_soa_indirect(bld, gallivm, attrib, chan,
600 					 indir_index, pixoffx, pixoffy);
601 
602 
603    const unsigned interp = bld->interp[attrib];
604    LLVMValueRef dadx = coeff_bld->zero;
605    LLVMValueRef dady = coeff_bld->zero;
606    LLVMValueRef a = coeff_bld->zero;
607 
608    LLVMValueRef index = lp_build_const_int32(gallivm, chan);
609 
610    switch (interp) {
611    case LP_INTERP_PERSPECTIVE:
612       FALLTHROUGH;
613    case LP_INTERP_LINEAR:
614       dadx = lp_build_extract_broadcast(gallivm, setup_bld->type,
615                                         coeff_bld->type, bld->dadxaos[attrib],
616                                         index);
617 
618       dady = lp_build_extract_broadcast(gallivm, setup_bld->type,
619                                         coeff_bld->type, bld->dadyaos[attrib],
620                                         index);
621 
622       a = lp_build_extract_broadcast(gallivm, setup_bld->type,
623                                      coeff_bld->type, bld->a0aos[attrib],
624                                      index);
625 
626       /*
627        * a = a0 + (x * dadx + y * dady)
628        */
629       a = lp_build_fmuladd(builder, dadx, pixoffx, a);
630       a = lp_build_fmuladd(builder, dady, pixoffy, a);
631 
632       if (interp == LP_INTERP_PERSPECTIVE) {
633         LLVMValueRef w = bld->attribs[0][3];
634         assert(attrib != 0);
635         assert(bld->mask[0] & TGSI_WRITEMASK_W);
636         LLVMValueRef oow = lp_build_rcp(coeff_bld, w);
637         a = lp_build_mul(coeff_bld, a, oow);
638       }
639 
640       break;
641    case LP_INTERP_CONSTANT:
642    case LP_INTERP_FACING:
643       a = lp_build_extract_broadcast(gallivm, setup_bld->type,
644                                      coeff_bld->type, bld->a0aos[attrib],
645                                      index);
646       break;
647    default:
648       assert(0);
649       break;
650    }
651    return a;
652 }
653 
654 /**
655  * Generate the position vectors.
656  *
657  * Parameter x0, y0 are the integer values with upper left coordinates.
658  */
659 static void
pos_init(struct lp_build_interp_soa_context * bld,LLVMValueRef x0,LLVMValueRef y0)660 pos_init(struct lp_build_interp_soa_context *bld,
661          LLVMValueRef x0,
662          LLVMValueRef y0)
663 {
664    LLVMBuilderRef builder = bld->coeff_bld.gallivm->builder;
665    struct lp_build_context *coeff_bld = &bld->coeff_bld;
666 
667    bld->x = LLVMBuildSIToFP(builder, x0, coeff_bld->elem_type, "");
668    bld->y = LLVMBuildSIToFP(builder, y0, coeff_bld->elem_type, "");
669 }
670 
671 
672 /**
673  * Initialize fragment shader input attribute info.
674  */
675 void
lp_build_interp_soa_init(struct lp_build_interp_soa_context * bld,struct gallivm_state * gallivm,unsigned num_inputs,const struct lp_shader_input * inputs,boolean pixel_center_integer,unsigned coverage_samples,LLVMValueRef sample_pos_array,LLVMValueRef num_loop,boolean depth_clamp,LLVMBuilderRef builder,struct lp_type type,LLVMValueRef a0_ptr,LLVMValueRef dadx_ptr,LLVMValueRef dady_ptr,LLVMValueRef x0,LLVMValueRef y0)676 lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
677                          struct gallivm_state *gallivm,
678                          unsigned num_inputs,
679                          const struct lp_shader_input *inputs,
680                          boolean pixel_center_integer,
681                          unsigned coverage_samples,
682                          LLVMValueRef sample_pos_array,
683                          LLVMValueRef num_loop,
684                          boolean depth_clamp,
685                          LLVMBuilderRef builder,
686                          struct lp_type type,
687                          LLVMValueRef a0_ptr,
688                          LLVMValueRef dadx_ptr,
689                          LLVMValueRef dady_ptr,
690                          LLVMValueRef x0,
691                          LLVMValueRef y0)
692 {
693    struct lp_type coeff_type;
694    struct lp_type setup_type;
695    unsigned attrib;
696    unsigned chan;
697 
698    memset(bld, 0, sizeof *bld);
699 
700    memset(&coeff_type, 0, sizeof coeff_type);
701    coeff_type.floating = TRUE;
702    coeff_type.sign = TRUE;
703    coeff_type.width = 32;
704    coeff_type.length = type.length;
705 
706    memset(&setup_type, 0, sizeof setup_type);
707    setup_type.floating = TRUE;
708    setup_type.sign = TRUE;
709    setup_type.width = 32;
710    setup_type.length = TGSI_NUM_CHANNELS;
711 
712 
713    /* XXX: we don't support interpolating into any other types */
714    assert(memcmp(&coeff_type, &type, sizeof coeff_type) == 0);
715 
716    lp_build_context_init(&bld->coeff_bld, gallivm, coeff_type);
717    lp_build_context_init(&bld->setup_bld, gallivm, setup_type);
718 
719    /* For convenience */
720    bld->pos = bld->attribs[0];
721    bld->inputs = (const LLVMValueRef (*)[TGSI_NUM_CHANNELS]) bld->attribs[1];
722 
723    /* Position */
724    bld->mask[0] = TGSI_WRITEMASK_XYZW;
725    bld->interp[0] = LP_INTERP_LINEAR;
726    bld->interp_loc[0] = 0;
727 
728    /* Inputs */
729    for (attrib = 0; attrib < num_inputs; ++attrib) {
730       bld->mask[1 + attrib] = inputs[attrib].usage_mask;
731       bld->interp[1 + attrib] = inputs[attrib].interp;
732       bld->interp_loc[1 + attrib] = inputs[attrib].location;
733    }
734    bld->num_attribs = 1 + num_inputs;
735 
736    /* needed for indirect */
737    bld->a0_ptr = a0_ptr;
738    bld->dadx_ptr = dadx_ptr;
739    bld->dady_ptr = dady_ptr;
740 
741    /* Ensure all masked out input channels have a valid value */
742    for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
743       for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
744          bld->attribs[attrib][chan] = bld->coeff_bld.undef;
745       }
746    }
747 
748    if (pixel_center_integer) {
749       bld->pos_offset = 0.0;
750    } else {
751       bld->pos_offset = 0.5;
752    }
753    bld->depth_clamp = depth_clamp;
754    bld->coverage_samples = coverage_samples;
755    bld->num_loop = num_loop;
756    bld->sample_pos_array = sample_pos_array;
757 
758    pos_init(bld, x0, y0);
759 
760    /*
761     * Simple method (single step interpolation) may be slower if vector length
762     * is just 4, but the results are different (generally less accurate) with
763     * the other method, so always use more accurate version.
764     */
765    {
766       /* XXX this should use a global static table */
767       unsigned i;
768       unsigned num_loops = 16 / type.length;
769       LLVMValueRef pixoffx, pixoffy, index;
770       LLVMValueRef ptr;
771 
772       bld->xoffset_store = lp_build_array_alloca(gallivm,
773                                                  lp_build_vec_type(gallivm, type),
774                                                  lp_build_const_int32(gallivm, num_loops),
775                                                  "");
776       bld->yoffset_store = lp_build_array_alloca(gallivm,
777                                                  lp_build_vec_type(gallivm, type),
778                                                  lp_build_const_int32(gallivm, num_loops),
779                                                  "");
780       for (i = 0; i < num_loops; i++) {
781          index = lp_build_const_int32(gallivm, i);
782          calc_offsets(&bld->coeff_bld, i*type.length/4, &pixoffx, &pixoffy);
783          ptr = LLVMBuildGEP(builder, bld->xoffset_store, &index, 1, "");
784          LLVMBuildStore(builder, pixoffx, ptr);
785          ptr = LLVMBuildGEP(builder, bld->yoffset_store, &index, 1, "");
786          LLVMBuildStore(builder, pixoffy, ptr);
787       }
788    }
789    coeffs_init_simple(bld, a0_ptr, dadx_ptr, dady_ptr);
790 }
791 
792 
793 /*
794  * Advance the position and inputs to the given quad within the block.
795  */
796 
797 void
lp_build_interp_soa_update_inputs_dyn(struct lp_build_interp_soa_context * bld,struct gallivm_state * gallivm,LLVMValueRef quad_start_index,LLVMValueRef mask_store,LLVMValueRef sample_id)798 lp_build_interp_soa_update_inputs_dyn(struct lp_build_interp_soa_context *bld,
799                                       struct gallivm_state *gallivm,
800                                       LLVMValueRef quad_start_index,
801                                       LLVMValueRef mask_store,
802                                       LLVMValueRef sample_id)
803 {
804    attribs_update_simple(bld, gallivm, quad_start_index, mask_store, sample_id, 1, bld->num_attribs);
805 }
806 
807 void
lp_build_interp_soa_update_pos_dyn(struct lp_build_interp_soa_context * bld,struct gallivm_state * gallivm,LLVMValueRef quad_start_index,LLVMValueRef sample_id)808 lp_build_interp_soa_update_pos_dyn(struct lp_build_interp_soa_context *bld,
809                                    struct gallivm_state *gallivm,
810                                    LLVMValueRef quad_start_index,
811                                    LLVMValueRef sample_id)
812 {
813    attribs_update_simple(bld, gallivm, quad_start_index, NULL, sample_id, 0, 1);
814 }
815 
816