1 /**************************************************************************
2  *
3  * Copyright 2010 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 /**
29  * @file
30  * Texture sampling -- AoS.
31  *
32  * @author Jose Fonseca <jfonseca@vmware.com>
33  * @author Brian Paul <brianp@vmware.com>
34  */
35 
36 #include "pipe/p_defines.h"
37 #include "pipe/p_state.h"
38 #include "util/u_debug.h"
39 #include "util/u_dump.h"
40 #include "util/u_memory.h"
41 #include "util/u_math.h"
42 #include "util/format/u_format.h"
43 #include "util/u_cpu_detect.h"
44 #include "lp_bld_debug.h"
45 #include "lp_bld_type.h"
46 #include "lp_bld_const.h"
47 #include "lp_bld_conv.h"
48 #include "lp_bld_arit.h"
49 #include "lp_bld_bitarit.h"
50 #include "lp_bld_logic.h"
51 #include "lp_bld_swizzle.h"
52 #include "lp_bld_pack.h"
53 #include "lp_bld_flow.h"
54 #include "lp_bld_gather.h"
55 #include "lp_bld_format.h"
56 #include "lp_bld_init.h"
57 #include "lp_bld_sample.h"
58 #include "lp_bld_sample_aos.h"
59 #include "lp_bld_quad.h"
60 
61 
62 /**
63  * Build LLVM code for texture coord wrapping, for nearest filtering,
64  * for scaled integer texcoords.
65  * \param block_length  is the length of the pixel block along the
66  *                      coordinate axis
67  * \param coord  the incoming texcoord (s,t or r) scaled to the texture size
68  * \param coord_f  the incoming texcoord (s,t or r) as float vec
69  * \param length  the texture size along one dimension
70  * \param stride  pixel stride along the coordinate axis (in bytes)
71  * \param offset  the texel offset along the coord axis
72  * \param is_pot  if TRUE, length is a power of two
73  * \param wrap_mode  one of PIPE_TEX_WRAP_x
74  * \param out_offset  byte offset for the wrapped coordinate
75  * \param out_i  resulting sub-block pixel coordinate for coord0
76  */
77 static void
lp_build_sample_wrap_nearest_int(struct lp_build_sample_context * bld,unsigned block_length,LLVMValueRef coord,LLVMValueRef coord_f,LLVMValueRef length,LLVMValueRef stride,LLVMValueRef offset,boolean is_pot,unsigned wrap_mode,LLVMValueRef * out_offset,LLVMValueRef * out_i)78 lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld,
79                                  unsigned block_length,
80                                  LLVMValueRef coord,
81                                  LLVMValueRef coord_f,
82                                  LLVMValueRef length,
83                                  LLVMValueRef stride,
84                                  LLVMValueRef offset,
85                                  boolean is_pot,
86                                  unsigned wrap_mode,
87                                  LLVMValueRef *out_offset,
88                                  LLVMValueRef *out_i)
89 {
90    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
91    LLVMBuilderRef builder = bld->gallivm->builder;
92    LLVMValueRef length_minus_one;
93 
94    length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
95 
96    switch(wrap_mode) {
97    case PIPE_TEX_WRAP_REPEAT:
98       if(is_pot)
99          coord = LLVMBuildAnd(builder, coord, length_minus_one, "");
100       else {
101          struct lp_build_context *coord_bld = &bld->coord_bld;
102          LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
103          if (offset) {
104             offset = lp_build_int_to_float(coord_bld, offset);
105             offset = lp_build_div(coord_bld, offset, length_f);
106             coord_f = lp_build_add(coord_bld, coord_f, offset);
107          }
108          coord = lp_build_fract_safe(coord_bld, coord_f);
109          coord = lp_build_mul(coord_bld, coord, length_f);
110          coord = lp_build_itrunc(coord_bld, coord);
111       }
112       break;
113 
114    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
115       coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
116       coord = lp_build_min(int_coord_bld, coord, length_minus_one);
117       break;
118 
119    case PIPE_TEX_WRAP_CLAMP:
120    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
121    case PIPE_TEX_WRAP_MIRROR_REPEAT:
122    case PIPE_TEX_WRAP_MIRROR_CLAMP:
123    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
124    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
125    default:
126       assert(0);
127    }
128 
129    lp_build_sample_partial_offset(int_coord_bld, block_length, coord, stride,
130                                   out_offset, out_i);
131 }
132 
133 
134 /**
135  * Helper to compute the first coord and the weight for
136  * linear wrap repeat npot textures
137  */
138 static void
lp_build_coord_repeat_npot_linear_int(struct lp_build_sample_context * bld,LLVMValueRef coord_f,LLVMValueRef length_i,LLVMValueRef length_f,LLVMValueRef * coord0_i,LLVMValueRef * weight_i)139 lp_build_coord_repeat_npot_linear_int(struct lp_build_sample_context *bld,
140                                       LLVMValueRef coord_f,
141                                       LLVMValueRef length_i,
142                                       LLVMValueRef length_f,
143                                       LLVMValueRef *coord0_i,
144                                       LLVMValueRef *weight_i)
145 {
146    struct lp_build_context *coord_bld = &bld->coord_bld;
147    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
148    struct lp_build_context abs_coord_bld;
149    struct lp_type abs_type;
150    LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length_i,
151                                                 int_coord_bld->one);
152    LLVMValueRef mask, i32_c8, i32_c128, i32_c255;
153 
154    /* wrap with normalized floats is just fract */
155    coord_f = lp_build_fract(coord_bld, coord_f);
156    /* mul by size */
157    coord_f = lp_build_mul(coord_bld, coord_f, length_f);
158    /* convert to int, compute lerp weight */
159    coord_f = lp_build_mul_imm(&bld->coord_bld, coord_f, 256);
160 
161    /* At this point we don't have any negative numbers so use non-signed
162     * build context which might help on some archs.
163     */
164    abs_type = coord_bld->type;
165    abs_type.sign = 0;
166    lp_build_context_init(&abs_coord_bld, bld->gallivm, abs_type);
167    *coord0_i = lp_build_iround(&abs_coord_bld, coord_f);
168 
169    /* subtract 0.5 (add -128) */
170    i32_c128 = lp_build_const_int_vec(bld->gallivm, bld->int_coord_type, -128);
171    *coord0_i = LLVMBuildAdd(bld->gallivm->builder, *coord0_i, i32_c128, "");
172 
173    /* compute fractional part (AND with 0xff) */
174    i32_c255 = lp_build_const_int_vec(bld->gallivm, bld->int_coord_type, 255);
175    *weight_i = LLVMBuildAnd(bld->gallivm->builder, *coord0_i, i32_c255, "");
176 
177    /* compute floor (shift right 8) */
178    i32_c8 = lp_build_const_int_vec(bld->gallivm, bld->int_coord_type, 8);
179    *coord0_i = LLVMBuildAShr(bld->gallivm->builder, *coord0_i, i32_c8, "");
180    /*
181     * we avoided the 0.5/length division before the repeat wrap,
182     * now need to fix up edge cases with selects
183     */
184    mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
185                            PIPE_FUNC_LESS, *coord0_i, int_coord_bld->zero);
186    *coord0_i = lp_build_select(int_coord_bld, mask, length_minus_one, *coord0_i);
187    /*
188     * We should never get values too large - except if coord was nan or inf,
189     * in which case things go terribly wrong...
190     * Alternatively, could use fract_safe above...
191     */
192    *coord0_i = lp_build_min(int_coord_bld, *coord0_i, length_minus_one);
193 }
194 
195 
196 /**
197  * Build LLVM code for texture coord wrapping, for linear filtering,
198  * for scaled integer texcoords.
199  * \param block_length  is the length of the pixel block along the
200  *                      coordinate axis
201  * \param coord0  the incoming texcoord (s,t or r) scaled to the texture size
202  * \param coord_f  the incoming texcoord (s,t or r) as float vec
203  * \param length  the texture size along one dimension
204  * \param stride  pixel stride along the coordinate axis (in bytes)
205  * \param offset  the texel offset along the coord axis
206  * \param is_pot  if TRUE, length is a power of two
207  * \param wrap_mode  one of PIPE_TEX_WRAP_x
208  * \param offset0  resulting relative offset for coord0
209  * \param offset1  resulting relative offset for coord0 + 1
210  * \param i0  resulting sub-block pixel coordinate for coord0
211  * \param i1  resulting sub-block pixel coordinate for coord0 + 1
212  */
213 static void
lp_build_sample_wrap_linear_int(struct lp_build_sample_context * bld,unsigned block_length,LLVMValueRef coord0,LLVMValueRef * weight_i,LLVMValueRef coord_f,LLVMValueRef length,LLVMValueRef stride,LLVMValueRef offset,boolean is_pot,unsigned wrap_mode,LLVMValueRef * offset0,LLVMValueRef * offset1,LLVMValueRef * i0,LLVMValueRef * i1)214 lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
215                                 unsigned block_length,
216                                 LLVMValueRef coord0,
217                                 LLVMValueRef *weight_i,
218                                 LLVMValueRef coord_f,
219                                 LLVMValueRef length,
220                                 LLVMValueRef stride,
221                                 LLVMValueRef offset,
222                                 boolean is_pot,
223                                 unsigned wrap_mode,
224                                 LLVMValueRef *offset0,
225                                 LLVMValueRef *offset1,
226                                 LLVMValueRef *i0,
227                                 LLVMValueRef *i1)
228 {
229    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
230    LLVMBuilderRef builder = bld->gallivm->builder;
231    LLVMValueRef length_minus_one;
232    LLVMValueRef lmask, umask, mask;
233 
234    /*
235     * If the pixel block covers more than one pixel then there is no easy
236     * way to calculate offset1 relative to offset0. Instead, compute them
237     * independently. Otherwise, try to compute offset0 and offset1 with
238     * a single stride multiplication.
239     */
240 
241    length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
242 
243    if (block_length != 1) {
244       LLVMValueRef coord1;
245       switch(wrap_mode) {
246       case PIPE_TEX_WRAP_REPEAT:
247          if (is_pot) {
248             coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
249             coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
250             coord1 = LLVMBuildAnd(builder, coord1, length_minus_one, "");
251          }
252          else {
253             LLVMValueRef mask;
254             LLVMValueRef length_f = lp_build_int_to_float(&bld->coord_bld, length);
255             if (offset) {
256                offset = lp_build_int_to_float(&bld->coord_bld, offset);
257                offset = lp_build_div(&bld->coord_bld, offset, length_f);
258                coord_f = lp_build_add(&bld->coord_bld, coord_f, offset);
259             }
260             lp_build_coord_repeat_npot_linear_int(bld, coord_f,
261                                                   length, length_f,
262                                                   &coord0, weight_i);
263             mask = lp_build_compare(bld->gallivm, int_coord_bld->type,
264                                     PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
265             coord1 = LLVMBuildAnd(builder,
266                                   lp_build_add(int_coord_bld, coord0,
267                                                int_coord_bld->one),
268                                   mask, "");
269          }
270          break;
271 
272       case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
273          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
274          coord0 = lp_build_clamp(int_coord_bld, coord0, int_coord_bld->zero,
275                                 length_minus_one);
276          coord1 = lp_build_clamp(int_coord_bld, coord1, int_coord_bld->zero,
277                                 length_minus_one);
278          break;
279 
280       case PIPE_TEX_WRAP_CLAMP:
281       case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
282       case PIPE_TEX_WRAP_MIRROR_REPEAT:
283       case PIPE_TEX_WRAP_MIRROR_CLAMP:
284       case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
285       case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
286       default:
287          assert(0);
288          coord0 = int_coord_bld->zero;
289          coord1 = int_coord_bld->zero;
290          break;
291       }
292       lp_build_sample_partial_offset(int_coord_bld, block_length, coord0, stride,
293                                      offset0, i0);
294       lp_build_sample_partial_offset(int_coord_bld, block_length, coord1, stride,
295                                      offset1, i1);
296       return;
297    }
298 
299    *i0 = int_coord_bld->zero;
300    *i1 = int_coord_bld->zero;
301 
302    switch(wrap_mode) {
303    case PIPE_TEX_WRAP_REPEAT:
304       if (is_pot) {
305          coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
306       }
307       else {
308          LLVMValueRef length_f = lp_build_int_to_float(&bld->coord_bld, length);
309          if (offset) {
310             offset = lp_build_int_to_float(&bld->coord_bld, offset);
311             offset = lp_build_div(&bld->coord_bld, offset, length_f);
312             coord_f = lp_build_add(&bld->coord_bld, coord_f, offset);
313          }
314          lp_build_coord_repeat_npot_linear_int(bld, coord_f,
315                                                length, length_f,
316                                                &coord0, weight_i);
317       }
318 
319       mask = lp_build_compare(bld->gallivm, int_coord_bld->type,
320                               PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
321 
322       *offset0 = lp_build_mul(int_coord_bld, coord0, stride);
323       *offset1 = LLVMBuildAnd(builder,
324                               lp_build_add(int_coord_bld, *offset0, stride),
325                               mask, "");
326       break;
327 
328    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
329       /* XXX this might be slower than the separate path
330        * on some newer cpus. With sse41 this is 8 instructions vs. 7
331        * - at least on SNB this is almost certainly slower since
332        * min/max are cheaper than selects, and the muls aren't bad.
333        */
334       lmask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
335                                PIPE_FUNC_GEQUAL, coord0, int_coord_bld->zero);
336       umask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
337                                PIPE_FUNC_LESS, coord0, length_minus_one);
338 
339       coord0 = lp_build_select(int_coord_bld, lmask, coord0, int_coord_bld->zero);
340       coord0 = lp_build_select(int_coord_bld, umask, coord0, length_minus_one);
341 
342       mask = LLVMBuildAnd(builder, lmask, umask, "");
343 
344       *offset0 = lp_build_mul(int_coord_bld, coord0, stride);
345       *offset1 = lp_build_add(int_coord_bld,
346                               *offset0,
347                               LLVMBuildAnd(builder, stride, mask, ""));
348       break;
349 
350    case PIPE_TEX_WRAP_CLAMP:
351    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
352    case PIPE_TEX_WRAP_MIRROR_REPEAT:
353    case PIPE_TEX_WRAP_MIRROR_CLAMP:
354    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
355    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
356    default:
357       assert(0);
358       *offset0 = int_coord_bld->zero;
359       *offset1 = int_coord_bld->zero;
360       break;
361    }
362 }
363 
364 
365 /**
366  * Fetch texels for image with nearest sampling.
367  * Return filtered color as two vectors of 16-bit fixed point values.
368  */
369 static void
lp_build_sample_fetch_image_nearest(struct lp_build_sample_context * bld,LLVMValueRef data_ptr,LLVMValueRef offset,LLVMValueRef x_subcoord,LLVMValueRef y_subcoord,LLVMValueRef * colors)370 lp_build_sample_fetch_image_nearest(struct lp_build_sample_context *bld,
371                                     LLVMValueRef data_ptr,
372                                     LLVMValueRef offset,
373                                     LLVMValueRef x_subcoord,
374                                     LLVMValueRef y_subcoord,
375                                     LLVMValueRef *colors)
376 {
377    /*
378     * Fetch the pixels as 4 x 32bit (rgba order might differ):
379     *
380     *   rgba0 rgba1 rgba2 rgba3
381     *
382     * bit cast them into 16 x u8
383     *
384     *   r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
385     *
386     * unpack them into two 8 x i16:
387     *
388     *   r0 g0 b0 a0 r1 g1 b1 a1
389     *   r2 g2 b2 a2 r3 g3 b3 a3
390     *
391     * The higher 8 bits of the resulting elements will be zero.
392     */
393    LLVMBuilderRef builder = bld->gallivm->builder;
394    LLVMValueRef rgba8;
395    struct lp_build_context u8n;
396    LLVMTypeRef u8n_vec_type;
397    struct lp_type fetch_type;
398 
399    lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8, bld->vector_width));
400    u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type);
401 
402    fetch_type = lp_type_uint(bld->texel_type.width);
403    if (util_format_is_rgba8_variant(bld->format_desc)) {
404       /*
405        * Given the format is a rgba8, just read the pixels as is,
406        * without any swizzling. Swizzling will be done later.
407        */
408       rgba8 = lp_build_gather(bld->gallivm,
409                               bld->texel_type.length,
410                               bld->format_desc->block.bits,
411                               fetch_type,
412                               TRUE,
413                               data_ptr, offset, TRUE);
414 
415       rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
416    }
417    else {
418       rgba8 = lp_build_fetch_rgba_aos(bld->gallivm,
419                                       bld->format_desc,
420                                       u8n.type,
421                                       TRUE,
422                                       data_ptr, offset,
423                                       x_subcoord,
424                                       y_subcoord,
425                                       bld->cache);
426    }
427 
428    *colors = rgba8;
429 }
430 
431 
432 /**
433  * Sample a single texture image with nearest sampling.
434  * If sampling a cube texture, r = cube face in [0,5].
435  * Return filtered color as two vectors of 16-bit fixed point values.
436  */
437 static void
lp_build_sample_image_nearest(struct lp_build_sample_context * bld,LLVMValueRef int_size,LLVMValueRef row_stride_vec,LLVMValueRef img_stride_vec,LLVMValueRef data_ptr,LLVMValueRef mipoffsets,LLVMValueRef s,LLVMValueRef t,LLVMValueRef r,const LLVMValueRef * offsets,LLVMValueRef * colors)438 lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
439                               LLVMValueRef int_size,
440                               LLVMValueRef row_stride_vec,
441                               LLVMValueRef img_stride_vec,
442                               LLVMValueRef data_ptr,
443                               LLVMValueRef mipoffsets,
444                               LLVMValueRef s,
445                               LLVMValueRef t,
446                               LLVMValueRef r,
447                               const LLVMValueRef *offsets,
448                               LLVMValueRef *colors)
449 {
450    const unsigned dims = bld->dims;
451    struct lp_build_context i32;
452    LLVMValueRef width_vec, height_vec, depth_vec;
453    LLVMValueRef s_ipart, t_ipart = NULL, r_ipart = NULL;
454    LLVMValueRef s_float, t_float = NULL, r_float = NULL;
455    LLVMValueRef x_stride;
456    LLVMValueRef x_offset, offset;
457    LLVMValueRef x_subcoord, y_subcoord = NULL, z_subcoord;
458 
459    lp_build_context_init(&i32, bld->gallivm, lp_type_int_vec(32, bld->vector_width));
460 
461    lp_build_extract_image_sizes(bld,
462                                 &bld->int_size_bld,
463                                 bld->int_coord_type,
464                                 int_size,
465                                 &width_vec,
466                                 &height_vec,
467                                 &depth_vec);
468 
469    s_float = s; t_float = t; r_float = r;
470 
471    if (bld->static_sampler_state->normalized_coords) {
472       LLVMValueRef flt_size;
473 
474       flt_size = lp_build_int_to_float(&bld->float_size_bld, int_size);
475 
476       lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r);
477    }
478 
479    /* convert float to int */
480    /* For correct rounding, need floor, not truncation here.
481     * Note that in some cases (clamp to edge, no texel offsets) we
482     * could use a non-signed build context which would help archs
483     * greatly which don't have arch rounding.
484     */
485    s_ipart = lp_build_ifloor(&bld->coord_bld, s);
486    if (dims >= 2)
487       t_ipart = lp_build_ifloor(&bld->coord_bld, t);
488    if (dims >= 3)
489       r_ipart = lp_build_ifloor(&bld->coord_bld, r);
490 
491    /* add texel offsets */
492    if (offsets[0]) {
493       s_ipart = lp_build_add(&i32, s_ipart, offsets[0]);
494       if (dims >= 2) {
495          t_ipart = lp_build_add(&i32, t_ipart, offsets[1]);
496          if (dims >= 3) {
497             r_ipart = lp_build_add(&i32, r_ipart, offsets[2]);
498          }
499       }
500    }
501 
502    /* get pixel, row, image strides */
503    x_stride = lp_build_const_vec(bld->gallivm,
504                                  bld->int_coord_bld.type,
505                                  bld->format_desc->block.bits/8);
506 
507    /* Do texcoord wrapping, compute texel offset */
508    lp_build_sample_wrap_nearest_int(bld,
509                                     bld->format_desc->block.width,
510                                     s_ipart, s_float,
511                                     width_vec, x_stride, offsets[0],
512                                     bld->static_texture_state->pot_width,
513                                     bld->static_sampler_state->wrap_s,
514                                     &x_offset, &x_subcoord);
515    offset = x_offset;
516    if (dims >= 2) {
517       LLVMValueRef y_offset;
518       lp_build_sample_wrap_nearest_int(bld,
519                                        bld->format_desc->block.height,
520                                        t_ipart, t_float,
521                                        height_vec, row_stride_vec, offsets[1],
522                                        bld->static_texture_state->pot_height,
523                                        bld->static_sampler_state->wrap_t,
524                                        &y_offset, &y_subcoord);
525       offset = lp_build_add(&bld->int_coord_bld, offset, y_offset);
526       if (dims >= 3) {
527          LLVMValueRef z_offset;
528          lp_build_sample_wrap_nearest_int(bld,
529                                           1, /* block length (depth) */
530                                           r_ipart, r_float,
531                                           depth_vec, img_stride_vec, offsets[2],
532                                           bld->static_texture_state->pot_depth,
533                                           bld->static_sampler_state->wrap_r,
534                                           &z_offset, &z_subcoord);
535          offset = lp_build_add(&bld->int_coord_bld, offset, z_offset);
536       }
537    }
538    if (has_layer_coord(bld->static_texture_state->target)) {
539       LLVMValueRef z_offset;
540       /* The r coord is the cube face in [0,5] or array layer */
541       z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
542       offset = lp_build_add(&bld->int_coord_bld, offset, z_offset);
543    }
544    if (mipoffsets) {
545       offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets);
546    }
547 
548    lp_build_sample_fetch_image_nearest(bld, data_ptr, offset,
549                                        x_subcoord, y_subcoord,
550                                        colors);
551 }
552 
553 
554 /**
555  * Fetch texels for image with linear sampling.
556  * Return filtered color as two vectors of 16-bit fixed point values.
557  */
558 static void
lp_build_sample_fetch_image_linear(struct lp_build_sample_context * bld,LLVMValueRef data_ptr,LLVMValueRef offset[2][2][2],LLVMValueRef x_subcoord[2],LLVMValueRef y_subcoord[2],LLVMValueRef s_fpart,LLVMValueRef t_fpart,LLVMValueRef r_fpart,LLVMValueRef * colors)559 lp_build_sample_fetch_image_linear(struct lp_build_sample_context *bld,
560                                    LLVMValueRef data_ptr,
561                                    LLVMValueRef offset[2][2][2],
562                                    LLVMValueRef x_subcoord[2],
563                                    LLVMValueRef y_subcoord[2],
564                                    LLVMValueRef s_fpart,
565                                    LLVMValueRef t_fpart,
566                                    LLVMValueRef r_fpart,
567                                    LLVMValueRef *colors)
568 {
569    const unsigned dims = bld->dims;
570    LLVMBuilderRef builder = bld->gallivm->builder;
571    struct lp_build_context u8n;
572    LLVMTypeRef u8n_vec_type;
573    LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
574    LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
575    LLVMValueRef shuffle;
576    LLVMValueRef neighbors[2][2][2]; /* [z][y][x] */
577    LLVMValueRef packed;
578    unsigned i, j, k;
579    unsigned numj, numk;
580 
581    lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8, bld->vector_width));
582    u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type);
583 
584    /*
585     * Transform 4 x i32 in
586     *
587     *   s_fpart = {s0, s1, s2, s3}
588     *
589     * where each value is between 0 and 0xff,
590     *
591     * into one 16 x i20
592     *
593     *   s_fpart = {s0, s0, s0, s0, s1, s1, s1, s1, s2, s2, s2, s2, s3, s3, s3, s3}
594     *
595     * and likewise for t_fpart. There is no risk of loosing precision here
596     * since the fractional parts only use the lower 8bits.
597     */
598    s_fpart = LLVMBuildBitCast(builder, s_fpart, u8n_vec_type, "");
599    if (dims >= 2)
600       t_fpart = LLVMBuildBitCast(builder, t_fpart, u8n_vec_type, "");
601    if (dims >= 3)
602       r_fpart = LLVMBuildBitCast(builder, r_fpart, u8n_vec_type, "");
603 
604    for (j = 0; j < u8n.type.length; j += 4) {
605 #if UTIL_ARCH_LITTLE_ENDIAN
606       unsigned subindex = 0;
607 #else
608       unsigned subindex = 3;
609 #endif
610       LLVMValueRef index;
611 
612       index = LLVMConstInt(elem_type, j + subindex, 0);
613       for (i = 0; i < 4; ++i)
614          shuffles[j + i] = index;
615    }
616 
617    shuffle = LLVMConstVector(shuffles, u8n.type.length);
618 
619    s_fpart = LLVMBuildShuffleVector(builder, s_fpart, u8n.undef,
620                                     shuffle, "");
621    if (dims >= 2) {
622       t_fpart = LLVMBuildShuffleVector(builder, t_fpart, u8n.undef,
623                                        shuffle, "");
624    }
625    if (dims >= 3) {
626       r_fpart = LLVMBuildShuffleVector(builder, r_fpart, u8n.undef,
627                                        shuffle, "");
628    }
629 
630    /*
631     * Fetch the pixels as 4 x 32bit (rgba order might differ):
632     *
633     *   rgba0 rgba1 rgba2 rgba3
634     *
635     * bit cast them into 16 x u8
636     *
637     *   r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
638     *
639     * unpack them into two 8 x i16:
640     *
641     *   r0 g0 b0 a0 r1 g1 b1 a1
642     *   r2 g2 b2 a2 r3 g3 b3 a3
643     *
644     * The higher 8 bits of the resulting elements will be zero.
645     */
646    numj = 1 + (dims >= 2);
647    numk = 1 + (dims >= 3);
648 
649    for (k = 0; k < numk; k++) {
650       for (j = 0; j < numj; j++) {
651          for (i = 0; i < 2; i++) {
652             LLVMValueRef rgba8;
653 
654             if (util_format_is_rgba8_variant(bld->format_desc)) {
655                struct lp_type fetch_type;
656                /*
657                 * Given the format is a rgba8, just read the pixels as is,
658                 * without any swizzling. Swizzling will be done later.
659                 */
660                fetch_type = lp_type_uint(bld->texel_type.width);
661                rgba8 = lp_build_gather(bld->gallivm,
662                                        bld->texel_type.length,
663                                        bld->format_desc->block.bits,
664                                        fetch_type,
665                                        TRUE,
666                                        data_ptr, offset[k][j][i], TRUE);
667 
668                rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
669             }
670             else {
671                rgba8 = lp_build_fetch_rgba_aos(bld->gallivm,
672                                                bld->format_desc,
673                                                u8n.type,
674                                                TRUE,
675                                                data_ptr, offset[k][j][i],
676                                                x_subcoord[i],
677                                                y_subcoord[j],
678                                                bld->cache);
679             }
680 
681             neighbors[k][j][i] = rgba8;
682          }
683       }
684    }
685 
686    /*
687     * Linear interpolation with 8.8 fixed point.
688     */
689 
690    /* general 1/2/3-D lerping */
691    if (dims == 1) {
692       lp_build_reduce_filter(&u8n,
693                               bld->static_sampler_state->reduction_mode,
694                               LP_BLD_LERP_PRESCALED_WEIGHTS,
695                               1,
696                               s_fpart,
697                               &neighbors[0][0][0],
698                               &neighbors[0][0][1],
699                               &packed);
700    } else if (dims == 2) {
701       /* 2-D lerp */
702       lp_build_reduce_filter_2d(&u8n,
703                                  bld->static_sampler_state->reduction_mode,
704                                  LP_BLD_LERP_PRESCALED_WEIGHTS,
705                                  1,
706                                  s_fpart, t_fpart,
707                                  &neighbors[0][0][0],
708                                  &neighbors[0][0][1],
709                                  &neighbors[0][1][0],
710                                  &neighbors[0][1][1],
711                                  &packed);
712    } else {
713       /* 3-D lerp */
714       assert(dims == 3);
715       lp_build_reduce_filter_3d(&u8n,
716                                  bld->static_sampler_state->reduction_mode,
717                                  LP_BLD_LERP_PRESCALED_WEIGHTS,
718                                  1,
719                                  s_fpart, t_fpart, r_fpart,
720                                  &neighbors[0][0][0],
721                                  &neighbors[0][0][1],
722                                  &neighbors[0][1][0],
723                                  &neighbors[0][1][1],
724                                  &neighbors[1][0][0],
725                                  &neighbors[1][0][1],
726                                  &neighbors[1][1][0],
727                                  &neighbors[1][1][1],
728                                  &packed);
729    }
730 
731    *colors = packed;
732 }
733 
734 /**
735  * Sample a single texture image with (bi-)(tri-)linear sampling.
736  * Return filtered color as two vectors of 16-bit fixed point values.
737  */
738 static void
lp_build_sample_image_linear(struct lp_build_sample_context * bld,LLVMValueRef int_size,LLVMValueRef row_stride_vec,LLVMValueRef img_stride_vec,LLVMValueRef data_ptr,LLVMValueRef mipoffsets,LLVMValueRef s,LLVMValueRef t,LLVMValueRef r,const LLVMValueRef * offsets,LLVMValueRef * colors)739 lp_build_sample_image_linear(struct lp_build_sample_context *bld,
740                              LLVMValueRef int_size,
741                              LLVMValueRef row_stride_vec,
742                              LLVMValueRef img_stride_vec,
743                              LLVMValueRef data_ptr,
744                              LLVMValueRef mipoffsets,
745                              LLVMValueRef s,
746                              LLVMValueRef t,
747                              LLVMValueRef r,
748                              const LLVMValueRef *offsets,
749                              LLVMValueRef *colors)
750 {
751    const unsigned dims = bld->dims;
752    LLVMBuilderRef builder = bld->gallivm->builder;
753    struct lp_build_context i32;
754    LLVMValueRef i32_c8, i32_c128, i32_c255;
755    LLVMValueRef width_vec, height_vec, depth_vec;
756    LLVMValueRef s_ipart, s_fpart, s_float;
757    LLVMValueRef t_ipart = NULL, t_fpart = NULL, t_float = NULL;
758    LLVMValueRef r_ipart = NULL, r_fpart = NULL, r_float = NULL;
759    LLVMValueRef x_stride, y_stride, z_stride;
760    LLVMValueRef x_offset0, x_offset1;
761    LLVMValueRef y_offset0, y_offset1;
762    LLVMValueRef z_offset0, z_offset1;
763    LLVMValueRef offset[2][2][2]; /* [z][y][x] */
764    LLVMValueRef x_subcoord[2], y_subcoord[2] = {NULL, NULL}, z_subcoord[2];
765    unsigned x, y, z;
766 
767    lp_build_context_init(&i32, bld->gallivm, lp_type_int_vec(32, bld->vector_width));
768 
769    lp_build_extract_image_sizes(bld,
770                                 &bld->int_size_bld,
771                                 bld->int_coord_type,
772                                 int_size,
773                                 &width_vec,
774                                 &height_vec,
775                                 &depth_vec);
776 
777    s_float = s; t_float = t; r_float = r;
778 
779    if (bld->static_sampler_state->normalized_coords) {
780       LLVMValueRef scaled_size;
781       LLVMValueRef flt_size;
782 
783       /* scale size by 256 (8 fractional bits) */
784       scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8);
785 
786       flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size);
787 
788       lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r);
789    }
790    else {
791       /* scale coords by 256 (8 fractional bits) */
792       s = lp_build_mul_imm(&bld->coord_bld, s, 256);
793       if (dims >= 2)
794          t = lp_build_mul_imm(&bld->coord_bld, t, 256);
795       if (dims >= 3)
796          r = lp_build_mul_imm(&bld->coord_bld, r, 256);
797    }
798 
799    /* convert float to int */
800    /* For correct rounding, need round to nearest, not truncation here.
801     * Note that in some cases (clamp to edge, no texel offsets) we
802     * could use a non-signed build context which would help archs which
803     * don't have fptosi intrinsic with nearest rounding implemented.
804     */
805    s = lp_build_iround(&bld->coord_bld, s);
806    if (dims >= 2)
807       t = lp_build_iround(&bld->coord_bld, t);
808    if (dims >= 3)
809       r = lp_build_iround(&bld->coord_bld, r);
810 
811    /* subtract 0.5 (add -128) */
812    i32_c128 = lp_build_const_int_vec(bld->gallivm, i32.type, -128);
813 
814    s = LLVMBuildAdd(builder, s, i32_c128, "");
815    if (dims >= 2) {
816       t = LLVMBuildAdd(builder, t, i32_c128, "");
817    }
818    if (dims >= 3) {
819       r = LLVMBuildAdd(builder, r, i32_c128, "");
820    }
821 
822    /* compute floor (shift right 8) */
823    i32_c8 = lp_build_const_int_vec(bld->gallivm, i32.type, 8);
824    s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
825    if (dims >= 2)
826       t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
827    if (dims >= 3)
828       r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
829 
830    /* add texel offsets */
831    if (offsets[0]) {
832       s_ipart = lp_build_add(&i32, s_ipart, offsets[0]);
833       if (dims >= 2) {
834          t_ipart = lp_build_add(&i32, t_ipart, offsets[1]);
835          if (dims >= 3) {
836             r_ipart = lp_build_add(&i32, r_ipart, offsets[2]);
837          }
838       }
839    }
840 
841    /* compute fractional part (AND with 0xff) */
842    i32_c255 = lp_build_const_int_vec(bld->gallivm, i32.type, 255);
843    s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
844    if (dims >= 2)
845       t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
846    if (dims >= 3)
847       r_fpart = LLVMBuildAnd(builder, r, i32_c255, "");
848 
849    /* get pixel, row and image strides */
850    x_stride = lp_build_const_vec(bld->gallivm, bld->int_coord_bld.type,
851                                  bld->format_desc->block.bits/8);
852    y_stride = row_stride_vec;
853    z_stride = img_stride_vec;
854 
855    /* do texcoord wrapping and compute texel offsets */
856    lp_build_sample_wrap_linear_int(bld,
857                                    bld->format_desc->block.width,
858                                    s_ipart, &s_fpart, s_float,
859                                    width_vec, x_stride, offsets[0],
860                                    bld->static_texture_state->pot_width,
861                                    bld->static_sampler_state->wrap_s,
862                                    &x_offset0, &x_offset1,
863                                    &x_subcoord[0], &x_subcoord[1]);
864 
865    /* add potential cube/array/mip offsets now as they are constant per pixel */
866    if (has_layer_coord(bld->static_texture_state->target)) {
867       LLVMValueRef z_offset;
868       z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
869       /* The r coord is the cube face in [0,5] or array layer */
870       x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, z_offset);
871       x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, z_offset);
872    }
873    if (mipoffsets) {
874       x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, mipoffsets);
875       x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, mipoffsets);
876    }
877 
878    for (z = 0; z < 2; z++) {
879       for (y = 0; y < 2; y++) {
880          offset[z][y][0] = x_offset0;
881          offset[z][y][1] = x_offset1;
882       }
883    }
884 
885    if (dims >= 2) {
886       lp_build_sample_wrap_linear_int(bld,
887                                       bld->format_desc->block.height,
888                                       t_ipart, &t_fpart, t_float,
889                                       height_vec, y_stride, offsets[1],
890                                       bld->static_texture_state->pot_height,
891                                       bld->static_sampler_state->wrap_t,
892                                       &y_offset0, &y_offset1,
893                                       &y_subcoord[0], &y_subcoord[1]);
894 
895       for (z = 0; z < 2; z++) {
896          for (x = 0; x < 2; x++) {
897             offset[z][0][x] = lp_build_add(&bld->int_coord_bld,
898                                            offset[z][0][x], y_offset0);
899             offset[z][1][x] = lp_build_add(&bld->int_coord_bld,
900                                            offset[z][1][x], y_offset1);
901          }
902       }
903    }
904 
905    if (dims >= 3) {
906       lp_build_sample_wrap_linear_int(bld,
907                                       1, /* block length (depth) */
908                                       r_ipart, &r_fpart, r_float,
909                                       depth_vec, z_stride, offsets[2],
910                                       bld->static_texture_state->pot_depth,
911                                       bld->static_sampler_state->wrap_r,
912                                       &z_offset0, &z_offset1,
913                                       &z_subcoord[0], &z_subcoord[1]);
914       for (y = 0; y < 2; y++) {
915          for (x = 0; x < 2; x++) {
916             offset[0][y][x] = lp_build_add(&bld->int_coord_bld,
917                                            offset[0][y][x], z_offset0);
918             offset[1][y][x] = lp_build_add(&bld->int_coord_bld,
919                                            offset[1][y][x], z_offset1);
920          }
921       }
922    }
923 
924    lp_build_sample_fetch_image_linear(bld, data_ptr, offset,
925                                       x_subcoord, y_subcoord,
926                                       s_fpart, t_fpart, r_fpart,
927                                       colors);
928 }
929 
930 
931 /**
932  * Sample the texture/mipmap using given image filter and mip filter.
933  * data0_ptr and data1_ptr point to the two mipmap levels to sample
934  * from.  width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
935  * If we're using nearest miplevel sampling the '1' values will be null/unused.
936  */
937 static void
lp_build_sample_mipmap(struct lp_build_sample_context * bld,unsigned img_filter,unsigned mip_filter,LLVMValueRef s,LLVMValueRef t,LLVMValueRef r,const LLVMValueRef * offsets,LLVMValueRef ilevel0,LLVMValueRef ilevel1,LLVMValueRef lod_fpart,LLVMValueRef colors_var)938 lp_build_sample_mipmap(struct lp_build_sample_context *bld,
939                        unsigned img_filter,
940                        unsigned mip_filter,
941                        LLVMValueRef s,
942                        LLVMValueRef t,
943                        LLVMValueRef r,
944                        const LLVMValueRef *offsets,
945                        LLVMValueRef ilevel0,
946                        LLVMValueRef ilevel1,
947                        LLVMValueRef lod_fpart,
948                        LLVMValueRef colors_var)
949 {
950    LLVMBuilderRef builder = bld->gallivm->builder;
951    LLVMValueRef size0;
952    LLVMValueRef size1;
953    LLVMValueRef row_stride0_vec = NULL;
954    LLVMValueRef row_stride1_vec = NULL;
955    LLVMValueRef img_stride0_vec = NULL;
956    LLVMValueRef img_stride1_vec = NULL;
957    LLVMValueRef data_ptr0;
958    LLVMValueRef data_ptr1;
959    LLVMValueRef mipoff0 = NULL;
960    LLVMValueRef mipoff1 = NULL;
961    LLVMValueRef colors0;
962    LLVMValueRef colors1;
963 
964    /* sample the first mipmap level */
965    lp_build_mipmap_level_sizes(bld, ilevel0,
966                                &size0,
967                                &row_stride0_vec, &img_stride0_vec);
968    if (bld->num_mips == 1) {
969       data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
970    }
971    else {
972       /* This path should work for num_lods 1 too but slightly less efficient */
973       data_ptr0 = bld->base_ptr;
974       mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
975    }
976 
977    if (img_filter == PIPE_TEX_FILTER_NEAREST) {
978       lp_build_sample_image_nearest(bld,
979                                     size0,
980                                     row_stride0_vec, img_stride0_vec,
981                                     data_ptr0, mipoff0, s, t, r, offsets,
982                                     &colors0);
983    }
984    else {
985       assert(img_filter == PIPE_TEX_FILTER_LINEAR);
986       lp_build_sample_image_linear(bld,
987                                    size0,
988                                    row_stride0_vec, img_stride0_vec,
989                                    data_ptr0, mipoff0, s, t, r, offsets,
990                                    &colors0);
991    }
992 
993    /* Store the first level's colors in the output variables */
994    LLVMBuildStore(builder, colors0, colors_var);
995 
996    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
997       LLVMValueRef h16vec_scale = lp_build_const_vec(bld->gallivm,
998                                                      bld->lodf_bld.type, 256.0);
999       LLVMTypeRef i32vec_type = bld->lodi_bld.vec_type;
1000       struct lp_build_if_state if_ctx;
1001       LLVMValueRef need_lerp;
1002       unsigned num_quads = bld->coord_bld.type.length / 4;
1003       unsigned i;
1004 
1005       lod_fpart = LLVMBuildFMul(builder, lod_fpart, h16vec_scale, "");
1006       lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32vec_type, "lod_fpart.fixed16");
1007 
1008       /* need_lerp = lod_fpart > 0 */
1009       if (bld->num_lods == 1) {
1010          need_lerp = LLVMBuildICmp(builder, LLVMIntSGT,
1011                                    lod_fpart, bld->lodi_bld.zero,
1012                                    "need_lerp");
1013       }
1014       else {
1015          /*
1016           * We'll do mip filtering if any of the quads need it.
1017           * It might be better to split the vectors here and only fetch/filter
1018           * quads which need it.
1019           */
1020          /*
1021           * We need to clamp lod_fpart here since we can get negative
1022           * values which would screw up filtering if not all
1023           * lod_fpart values have same sign.
1024           * We can however then skip the greater than comparison.
1025           */
1026          lod_fpart = lp_build_max(&bld->lodi_bld, lod_fpart,
1027                                   bld->lodi_bld.zero);
1028          need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, lod_fpart);
1029       }
1030 
1031       lp_build_if(&if_ctx, bld->gallivm, need_lerp);
1032       {
1033          struct lp_build_context u8n_bld;
1034 
1035          lp_build_context_init(&u8n_bld, bld->gallivm, lp_type_unorm(8, bld->vector_width));
1036 
1037          /* sample the second mipmap level */
1038          lp_build_mipmap_level_sizes(bld, ilevel1,
1039                                      &size1,
1040                                      &row_stride1_vec, &img_stride1_vec);
1041          if (bld->num_mips == 1) {
1042             data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
1043          }
1044          else {
1045             data_ptr1 = bld->base_ptr;
1046             mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
1047          }
1048 
1049          if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1050             lp_build_sample_image_nearest(bld,
1051                                           size1,
1052                                           row_stride1_vec, img_stride1_vec,
1053                                           data_ptr1, mipoff1, s, t, r, offsets,
1054                                           &colors1);
1055          }
1056          else {
1057             lp_build_sample_image_linear(bld,
1058                                          size1,
1059                                          row_stride1_vec, img_stride1_vec,
1060                                          data_ptr1, mipoff1, s, t, r, offsets,
1061                                          &colors1);
1062          }
1063 
1064          /* interpolate samples from the two mipmap levels */
1065 
1066          if (num_quads == 1 && bld->num_lods == 1) {
1067             lod_fpart = LLVMBuildTrunc(builder, lod_fpart, u8n_bld.elem_type, "");
1068             lod_fpart = lp_build_broadcast_scalar(&u8n_bld, lod_fpart);
1069          }
1070          else {
1071             unsigned num_chans_per_lod = 4 * bld->coord_type.length / bld->num_lods;
1072             LLVMTypeRef tmp_vec_type = LLVMVectorType(u8n_bld.elem_type, bld->lodi_bld.type.length);
1073             LLVMValueRef shuffle[LP_MAX_VECTOR_LENGTH];
1074 
1075             /* Take the LSB of lod_fpart */
1076             lod_fpart = LLVMBuildTrunc(builder, lod_fpart, tmp_vec_type, "");
1077 
1078             /* Broadcast each lod weight into their respective channels */
1079             for (i = 0; i < u8n_bld.type.length; ++i) {
1080                shuffle[i] = lp_build_const_int32(bld->gallivm, i / num_chans_per_lod);
1081             }
1082             lod_fpart = LLVMBuildShuffleVector(builder, lod_fpart, LLVMGetUndef(tmp_vec_type),
1083                                                LLVMConstVector(shuffle, u8n_bld.type.length), "");
1084          }
1085 
1086          lp_build_reduce_filter(&u8n_bld,
1087                                 bld->static_sampler_state->reduction_mode,
1088                                 LP_BLD_LERP_PRESCALED_WEIGHTS,
1089                                 1,
1090                                 lod_fpart,
1091                                 &colors0,
1092                                 &colors1,
1093                                 &colors0);
1094 
1095          LLVMBuildStore(builder, colors0, colors_var);
1096       }
1097       lp_build_endif(&if_ctx);
1098    }
1099 }
1100 
1101 
1102 
1103 /**
1104  * Texture sampling in AoS format.  Used when sampling common 32-bit/texel
1105  * formats.  1D/2D/3D/cube texture supported.  All mipmap sampling modes
1106  * but only limited texture coord wrap modes.
1107  */
1108 void
lp_build_sample_aos(struct lp_build_sample_context * bld,unsigned sampler_unit,LLVMValueRef s,LLVMValueRef t,LLVMValueRef r,const LLVMValueRef * offsets,LLVMValueRef lod_positive,LLVMValueRef lod_fpart,LLVMValueRef ilevel0,LLVMValueRef ilevel1,LLVMValueRef texel_out[4])1109 lp_build_sample_aos(struct lp_build_sample_context *bld,
1110                     unsigned sampler_unit,
1111                     LLVMValueRef s,
1112                     LLVMValueRef t,
1113                     LLVMValueRef r,
1114                     const LLVMValueRef *offsets,
1115                     LLVMValueRef lod_positive,
1116                     LLVMValueRef lod_fpart,
1117                     LLVMValueRef ilevel0,
1118                     LLVMValueRef ilevel1,
1119                     LLVMValueRef texel_out[4])
1120 {
1121    LLVMBuilderRef builder = bld->gallivm->builder;
1122    const unsigned mip_filter = bld->static_sampler_state->min_mip_filter;
1123    const unsigned min_filter = bld->static_sampler_state->min_img_filter;
1124    const unsigned mag_filter = bld->static_sampler_state->mag_img_filter;
1125    const unsigned dims = bld->dims;
1126    LLVMValueRef packed_var, packed;
1127    LLVMValueRef unswizzled[4];
1128    struct lp_build_context u8n_bld;
1129 
1130    /* we only support the common/simple wrap modes at this time */
1131    assert(lp_is_simple_wrap_mode(bld->static_sampler_state->wrap_s));
1132    if (dims >= 2)
1133       assert(lp_is_simple_wrap_mode(bld->static_sampler_state->wrap_t));
1134    if (dims >= 3)
1135       assert(lp_is_simple_wrap_mode(bld->static_sampler_state->wrap_r));
1136 
1137 
1138    /* make 8-bit unorm builder context */
1139    lp_build_context_init(&u8n_bld, bld->gallivm, lp_type_unorm(8, bld->vector_width));
1140 
1141    /*
1142     * Get/interpolate texture colors.
1143     */
1144 
1145    packed_var = lp_build_alloca(bld->gallivm, u8n_bld.vec_type, "packed_var");
1146 
1147    if (min_filter == mag_filter) {
1148       /* no need to distinguish between minification and magnification */
1149       lp_build_sample_mipmap(bld,
1150                              min_filter, mip_filter,
1151                              s, t, r, offsets,
1152                              ilevel0, ilevel1, lod_fpart,
1153                              packed_var);
1154    }
1155    else {
1156       /* Emit conditional to choose min image filter or mag image filter
1157        * depending on the lod being > 0 or <= 0, respectively.
1158        */
1159       struct lp_build_if_state if_ctx;
1160 
1161       /*
1162        * FIXME this should take all lods into account, if some are min
1163        * some max probably could hack up the weights in the linear
1164        * path with selects to work for nearest.
1165        */
1166       if (bld->num_lods > 1)
1167          lod_positive = LLVMBuildExtractElement(builder, lod_positive,
1168                                                 lp_build_const_int32(bld->gallivm, 0), "");
1169 
1170       lod_positive = LLVMBuildTrunc(builder, lod_positive,
1171                                     LLVMInt1TypeInContext(bld->gallivm->context), "");
1172 
1173       lp_build_if(&if_ctx, bld->gallivm, lod_positive);
1174       {
1175          /* Use the minification filter */
1176          lp_build_sample_mipmap(bld,
1177                                 min_filter, mip_filter,
1178                                 s, t, r, offsets,
1179                                 ilevel0, ilevel1, lod_fpart,
1180                                 packed_var);
1181       }
1182       lp_build_else(&if_ctx);
1183       {
1184          /* Use the magnification filter */
1185          lp_build_sample_mipmap(bld,
1186                                 mag_filter, PIPE_TEX_MIPFILTER_NONE,
1187                                 s, t, r, offsets,
1188                                 ilevel0, NULL, NULL,
1189                                 packed_var);
1190       }
1191       lp_build_endif(&if_ctx);
1192    }
1193 
1194    packed = LLVMBuildLoad(builder, packed_var, "");
1195 
1196    /*
1197     * Convert to SoA and swizzle.
1198     */
1199    lp_build_rgba8_to_fi32_soa(bld->gallivm,
1200                              bld->texel_type,
1201                              packed, unswizzled);
1202 
1203    if (util_format_is_rgba8_variant(bld->format_desc)) {
1204       lp_build_format_swizzle_soa(bld->format_desc,
1205                                   &bld->texel_bld,
1206                                   unswizzled, texel_out);
1207    }
1208    else {
1209       texel_out[0] = unswizzled[0];
1210       texel_out[1] = unswizzled[1];
1211       texel_out[2] = unswizzled[2];
1212       texel_out[3] = unswizzled[3];
1213    }
1214 }
1215