1 /**************************************************************************
2  *
3  * Copyright 2009 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 /**
29  * @file
30  * Texture sampling -- common code.
31  *
32  * @author Jose Fonseca <jfonseca@vmware.com>
33  */
34 
35 #include "pipe/p_defines.h"
36 #include "pipe/p_state.h"
37 #include "util/format/u_format.h"
38 #include "util/u_math.h"
39 #include "util/u_cpu_detect.h"
40 #include "lp_bld_arit.h"
41 #include "lp_bld_const.h"
42 #include "lp_bld_debug.h"
43 #include "lp_bld_printf.h"
44 #include "lp_bld_flow.h"
45 #include "lp_bld_sample.h"
46 #include "lp_bld_swizzle.h"
47 #include "lp_bld_type.h"
48 #include "lp_bld_logic.h"
49 #include "lp_bld_pack.h"
50 #include "lp_bld_quad.h"
51 #include "lp_bld_bitarit.h"
52 
53 
54 /*
55  * Bri-linear factor. Should be greater than one.
56  */
57 #define BRILINEAR_FACTOR 2
58 
59 /**
60  * Does the given texture wrap mode allow sampling the texture border color?
61  * XXX maybe move this into gallium util code.
62  */
63 boolean
lp_sampler_wrap_mode_uses_border_color(unsigned mode,unsigned min_img_filter,unsigned mag_img_filter)64 lp_sampler_wrap_mode_uses_border_color(unsigned mode,
65                                        unsigned min_img_filter,
66                                        unsigned mag_img_filter)
67 {
68    switch (mode) {
69    case PIPE_TEX_WRAP_REPEAT:
70    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
71    case PIPE_TEX_WRAP_MIRROR_REPEAT:
72    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
73       return FALSE;
74    case PIPE_TEX_WRAP_CLAMP:
75    case PIPE_TEX_WRAP_MIRROR_CLAMP:
76       if (min_img_filter == PIPE_TEX_FILTER_NEAREST &&
77           mag_img_filter == PIPE_TEX_FILTER_NEAREST) {
78          return FALSE;
79       } else {
80          return TRUE;
81       }
82    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
83    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
84       return TRUE;
85    default:
86       assert(0 && "unexpected wrap mode");
87       return FALSE;
88    }
89 }
90 
91 
92 /**
93  * Initialize lp_sampler_static_texture_state object with the gallium
94  * texture/sampler_view state (this contains the parts which are
95  * considered static).
96  */
97 void
lp_sampler_static_texture_state(struct lp_static_texture_state * state,const struct pipe_sampler_view * view)98 lp_sampler_static_texture_state(struct lp_static_texture_state *state,
99                                 const struct pipe_sampler_view *view)
100 {
101    const struct pipe_resource *texture;
102 
103    memset(state, 0, sizeof *state);
104 
105    if (!view || !view->texture)
106       return;
107 
108    texture = view->texture;
109 
110    state->format            = view->format;
111    state->swizzle_r         = view->swizzle_r;
112    state->swizzle_g         = view->swizzle_g;
113    state->swizzle_b         = view->swizzle_b;
114    state->swizzle_a         = view->swizzle_a;
115 
116    state->target            = view->target;
117    state->pot_width         = util_is_power_of_two_or_zero(texture->width0);
118    state->pot_height        = util_is_power_of_two_or_zero(texture->height0);
119    state->pot_depth         = util_is_power_of_two_or_zero(texture->depth0);
120    state->level_zero_only   = !view->u.tex.last_level;
121 
122    /*
123     * the layer / element / level parameters are all either dynamic
124     * state or handled transparently wrt execution.
125     */
126 }
127 
128 /**
129  * Initialize lp_sampler_static_texture_state object with the gallium
130  * texture/sampler_view state (this contains the parts which are
131  * considered static).
132  */
133 void
lp_sampler_static_texture_state_image(struct lp_static_texture_state * state,const struct pipe_image_view * view)134 lp_sampler_static_texture_state_image(struct lp_static_texture_state *state,
135                                       const struct pipe_image_view *view)
136 {
137    const struct pipe_resource *resource;
138 
139    memset(state, 0, sizeof *state);
140 
141    if (!view || !view->resource)
142       return;
143 
144    resource = view->resource;
145 
146    state->format            = view->format;
147    state->swizzle_r         = PIPE_SWIZZLE_X;
148    state->swizzle_g         = PIPE_SWIZZLE_Y;
149    state->swizzle_b         = PIPE_SWIZZLE_Z;
150    state->swizzle_a         = PIPE_SWIZZLE_W;
151 
152    state->target            = view->resource->target;
153    state->pot_width         = util_is_power_of_two_or_zero(resource->width0);
154    state->pot_height        = util_is_power_of_two_or_zero(resource->height0);
155    state->pot_depth         = util_is_power_of_two_or_zero(resource->depth0);
156    state->level_zero_only   = 0;
157 
158    /*
159     * the layer / element / level parameters are all either dynamic
160     * state or handled transparently wrt execution.
161     */
162 }
163 
164 /**
165  * Initialize lp_sampler_static_sampler_state object with the gallium sampler
166  * state (this contains the parts which are considered static).
167  */
168 void
lp_sampler_static_sampler_state(struct lp_static_sampler_state * state,const struct pipe_sampler_state * sampler)169 lp_sampler_static_sampler_state(struct lp_static_sampler_state *state,
170                                 const struct pipe_sampler_state *sampler)
171 {
172    memset(state, 0, sizeof *state);
173 
174    if (!sampler)
175       return;
176 
177    /*
178     * We don't copy sampler state over unless it is actually enabled, to avoid
179     * spurious recompiles, as the sampler static state is part of the shader
180     * key.
181     *
182     * Ideally gallium frontends or cso_cache module would make all state
183     * canonical, but until that happens it's better to be safe than sorry here.
184     *
185     * XXX: Actually there's much more than can be done here, especially
186     * regarding 1D/2D/3D/CUBE textures, wrap modes, etc.
187     */
188 
189    state->wrap_s            = sampler->wrap_s;
190    state->wrap_t            = sampler->wrap_t;
191    state->wrap_r            = sampler->wrap_r;
192    state->min_img_filter    = sampler->min_img_filter;
193    state->mag_img_filter    = sampler->mag_img_filter;
194    state->min_mip_filter    = sampler->min_mip_filter;
195    state->seamless_cube_map = sampler->seamless_cube_map;
196    state->reduction_mode    = sampler->reduction_mode;
197    state->aniso = sampler->max_anisotropy > 1.0f;
198 
199    if (sampler->max_lod > 0.0f) {
200       state->max_lod_pos = 1;
201    }
202 
203    if (sampler->lod_bias != 0.0f) {
204       state->lod_bias_non_zero = 1;
205    }
206 
207    if (state->min_mip_filter != PIPE_TEX_MIPFILTER_NONE ||
208        state->min_img_filter != state->mag_img_filter) {
209 
210       /* If min_lod == max_lod we can greatly simplify mipmap selection.
211        * This is a case that occurs during automatic mipmap generation.
212        */
213       if (sampler->min_lod == sampler->max_lod) {
214          state->min_max_lod_equal = 1;
215       } else {
216          if (sampler->min_lod > 0.0f) {
217             state->apply_min_lod = 1;
218          }
219 
220          /*
221           * XXX this won't do anything with the mesa state tracker which always
222           * sets max_lod to not more than actually present mip maps...
223           */
224          if (sampler->max_lod < (PIPE_MAX_TEXTURE_LEVELS - 1)) {
225             state->apply_max_lod = 1;
226          }
227       }
228    }
229 
230    state->compare_mode      = sampler->compare_mode;
231    if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE) {
232       state->compare_func   = sampler->compare_func;
233    }
234 
235    state->normalized_coords = sampler->normalized_coords;
236 }
237 
238 /* build aniso pmin value */
239 static LLVMValueRef
lp_build_pmin(struct lp_build_sample_context * bld,unsigned texture_unit,LLVMValueRef s,LLVMValueRef t,LLVMValueRef max_aniso)240 lp_build_pmin(struct lp_build_sample_context *bld,
241               unsigned texture_unit,
242               LLVMValueRef s,
243               LLVMValueRef t,
244               LLVMValueRef max_aniso)
245 {
246    struct gallivm_state *gallivm = bld->gallivm;
247    LLVMBuilderRef builder = bld->gallivm->builder;
248    struct lp_build_context *coord_bld = &bld->coord_bld;
249    struct lp_build_context *int_size_bld = &bld->int_size_in_bld;
250    struct lp_build_context *float_size_bld = &bld->float_size_in_bld;
251    struct lp_build_context *pmin_bld = &bld->lodf_bld;
252    LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
253    LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
254    LLVMValueRef index1 = LLVMConstInt(i32t, 1, 0);
255    LLVMValueRef ddx_ddy = lp_build_packed_ddx_ddy_twocoord(coord_bld, s, t);
256    LLVMValueRef int_size, float_size;
257    LLVMValueRef first_level, first_level_vec;
258    unsigned length = coord_bld->type.length;
259    unsigned num_quads = length / 4;
260    boolean pmin_per_quad = pmin_bld->type.length != length;
261    unsigned i;
262 
263    first_level = bld->dynamic_state->first_level(bld->dynamic_state, bld->gallivm,
264                                                  bld->context_ptr, texture_unit, NULL);
265    first_level_vec = lp_build_broadcast_scalar(int_size_bld, first_level);
266    int_size = lp_build_minify(int_size_bld, bld->int_size, first_level_vec, TRUE);
267    float_size = lp_build_int_to_float(float_size_bld, int_size);
268    max_aniso = lp_build_broadcast_scalar(coord_bld, max_aniso);
269    max_aniso = lp_build_mul(coord_bld, max_aniso, max_aniso);
270 
271    static const unsigned char swizzle01[] = { /* no-op swizzle */
272       0, 1,
273       LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
274    };
275    static const unsigned char swizzle23[] = {
276       2, 3,
277       LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
278    };
279    LLVMValueRef ddx_ddys, ddx_ddyt, floatdim, shuffles[LP_MAX_VECTOR_LENGTH / 4];
280 
281    for (i = 0; i < num_quads; i++) {
282       shuffles[i*4+0] = shuffles[i*4+1] = index0;
283       shuffles[i*4+2] = shuffles[i*4+3] = index1;
284    }
285    floatdim = LLVMBuildShuffleVector(builder, float_size, float_size,
286                                      LLVMConstVector(shuffles, length), "");
287    ddx_ddy = lp_build_mul(coord_bld, ddx_ddy, floatdim);
288 
289    ddx_ddy = lp_build_mul(coord_bld, ddx_ddy, ddx_ddy);
290 
291    ddx_ddys = lp_build_swizzle_aos(coord_bld, ddx_ddy, swizzle01);
292    ddx_ddyt = lp_build_swizzle_aos(coord_bld, ddx_ddy, swizzle23);
293 
294    LLVMValueRef px2_py2 = lp_build_add(coord_bld, ddx_ddys, ddx_ddyt);
295 
296    static const unsigned char swizzle0[] = { /* no-op swizzle */
297      0, LP_BLD_SWIZZLE_DONTCARE,
298      LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
299    };
300    static const unsigned char swizzle1[] = {
301      1, LP_BLD_SWIZZLE_DONTCARE,
302      LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
303    };
304    LLVMValueRef px2 = lp_build_swizzle_aos(coord_bld, px2_py2, swizzle0);
305    LLVMValueRef py2 = lp_build_swizzle_aos(coord_bld, px2_py2, swizzle1);
306 
307    LLVMValueRef pmax2 = lp_build_max(coord_bld, px2, py2);
308    LLVMValueRef pmin2 = lp_build_min(coord_bld, px2, py2);
309 
310    LLVMValueRef temp = lp_build_mul(coord_bld, pmin2, max_aniso);
311 
312    LLVMValueRef comp = lp_build_compare(gallivm, coord_bld->type, PIPE_FUNC_GREATER,
313                                         pmin2, temp);
314 
315    LLVMValueRef pmin2_alt = lp_build_div(coord_bld, pmax2, max_aniso);
316 
317    pmin2 = lp_build_select(coord_bld, comp, pmin2_alt, pmin2);
318 
319    if (pmin_per_quad)
320       pmin2 = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
321                                         pmin_bld->type, pmin2, 0);
322    else
323       pmin2 = lp_build_swizzle_scalar_aos(pmin_bld, pmin2, 0, 4);
324    return pmin2;
325 }
326 
327 /**
328  * Generate code to compute coordinate gradient (rho).
329  * \param derivs  partial derivatives of (s, t, r, q) with respect to X and Y
330  *
331  * The resulting rho has bld->levelf format (per quad or per element).
332  */
333 static LLVMValueRef
lp_build_rho(struct lp_build_sample_context * bld,unsigned texture_unit,LLVMValueRef s,LLVMValueRef t,LLVMValueRef r,LLVMValueRef cube_rho,const struct lp_derivatives * derivs)334 lp_build_rho(struct lp_build_sample_context *bld,
335              unsigned texture_unit,
336              LLVMValueRef s,
337              LLVMValueRef t,
338              LLVMValueRef r,
339              LLVMValueRef cube_rho,
340              const struct lp_derivatives *derivs)
341 {
342    struct gallivm_state *gallivm = bld->gallivm;
343    struct lp_build_context *int_size_bld = &bld->int_size_in_bld;
344    struct lp_build_context *float_size_bld = &bld->float_size_in_bld;
345    struct lp_build_context *float_bld = &bld->float_bld;
346    struct lp_build_context *coord_bld = &bld->coord_bld;
347    struct lp_build_context *rho_bld = &bld->lodf_bld;
348    const unsigned dims = bld->dims;
349    LLVMValueRef ddx_ddy[2] = {NULL};
350    LLVMBuilderRef builder = bld->gallivm->builder;
351    LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
352    LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
353    LLVMValueRef index1 = LLVMConstInt(i32t, 1, 0);
354    LLVMValueRef index2 = LLVMConstInt(i32t, 2, 0);
355    LLVMValueRef rho_vec;
356    LLVMValueRef int_size, float_size;
357    LLVMValueRef rho;
358    LLVMValueRef first_level, first_level_vec;
359    unsigned length = coord_bld->type.length;
360    unsigned num_quads = length / 4;
361    boolean rho_per_quad = rho_bld->type.length != length;
362    boolean no_rho_opt = bld->no_rho_approx && (dims > 1);
363    unsigned i;
364    LLVMValueRef i32undef = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
365    LLVMValueRef rho_xvec, rho_yvec;
366 
367    /* Note that all simplified calculations will only work for isotropic filtering */
368 
369    /*
370     * rho calcs are always per quad except for explicit derivs (excluding
371     * the messy cube maps for now) when requested.
372     */
373 
374    first_level = bld->dynamic_state->first_level(bld->dynamic_state, bld->gallivm,
375                                                  bld->context_ptr, texture_unit, NULL);
376    first_level_vec = lp_build_broadcast_scalar(int_size_bld, first_level);
377    int_size = lp_build_minify(int_size_bld, bld->int_size, first_level_vec, TRUE);
378    float_size = lp_build_int_to_float(float_size_bld, int_size);
379 
380    if (cube_rho) {
381       LLVMValueRef cubesize;
382       LLVMValueRef index0 = lp_build_const_int32(gallivm, 0);
383 
384       /*
385        * Cube map code did already everything except size mul and per-quad extraction.
386        * Luckily cube maps are always quadratic!
387        */
388       if (rho_per_quad) {
389          rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
390                                          rho_bld->type, cube_rho, 0);
391       }
392       else {
393          rho = lp_build_swizzle_scalar_aos(coord_bld, cube_rho, 0, 4);
394       }
395       /* Could optimize this for single quad just skip the broadcast */
396       cubesize = lp_build_extract_broadcast(gallivm, bld->float_size_in_type,
397                                             rho_bld->type, float_size, index0);
398       /* skipping sqrt hence returning rho squared */
399       cubesize = lp_build_mul(rho_bld, cubesize, cubesize);
400       rho = lp_build_mul(rho_bld, cubesize, rho);
401    }
402    else if (derivs) {
403       LLVMValueRef ddmax[3] = { NULL }, ddx[3] = { NULL }, ddy[3] = { NULL };
404       for (i = 0; i < dims; i++) {
405          LLVMValueRef floatdim;
406          LLVMValueRef indexi = lp_build_const_int32(gallivm, i);
407 
408          floatdim = lp_build_extract_broadcast(gallivm, bld->float_size_in_type,
409                                                coord_bld->type, float_size, indexi);
410 
411          /*
412           * note that for rho_per_quad case could reduce math (at some shuffle
413           * cost), but for now use same code to per-pixel lod case.
414           */
415          if (no_rho_opt) {
416             ddx[i] = lp_build_mul(coord_bld, floatdim, derivs->ddx[i]);
417             ddy[i] = lp_build_mul(coord_bld, floatdim, derivs->ddy[i]);
418             ddx[i] = lp_build_mul(coord_bld, ddx[i], ddx[i]);
419             ddy[i] = lp_build_mul(coord_bld, ddy[i], ddy[i]);
420          }
421          else {
422             LLVMValueRef tmpx, tmpy;
423             tmpx = lp_build_abs(coord_bld, derivs->ddx[i]);
424             tmpy = lp_build_abs(coord_bld, derivs->ddy[i]);
425             ddmax[i] = lp_build_max(coord_bld, tmpx, tmpy);
426             ddmax[i] = lp_build_mul(coord_bld, floatdim, ddmax[i]);
427          }
428       }
429       if (no_rho_opt) {
430          rho_xvec = lp_build_add(coord_bld, ddx[0], ddx[1]);
431          rho_yvec = lp_build_add(coord_bld, ddy[0], ddy[1]);
432          if (dims > 2) {
433             rho_xvec = lp_build_add(coord_bld, rho_xvec, ddx[2]);
434             rho_yvec = lp_build_add(coord_bld, rho_yvec, ddy[2]);
435          }
436          rho = lp_build_max(coord_bld, rho_xvec, rho_yvec);
437          /* skipping sqrt hence returning rho squared */
438      }
439       else {
440          rho = ddmax[0];
441          if (dims > 1) {
442             rho = lp_build_max(coord_bld, rho, ddmax[1]);
443             if (dims > 2) {
444                rho = lp_build_max(coord_bld, rho, ddmax[2]);
445             }
446          }
447       }
448       if (rho_per_quad) {
449          /*
450           * rho_vec contains per-pixel rho, convert to scalar per quad.
451           */
452          rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
453                                          rho_bld->type, rho, 0);
454       }
455    }
456    else {
457       /*
458        * This looks all a bit complex, but it's not that bad
459        * (the shuffle code makes it look worse than it is).
460        * Still, might not be ideal for all cases.
461        */
462       static const unsigned char swizzle0[] = { /* no-op swizzle */
463          0, LP_BLD_SWIZZLE_DONTCARE,
464          LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
465       };
466       static const unsigned char swizzle1[] = {
467          1, LP_BLD_SWIZZLE_DONTCARE,
468          LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
469       };
470       static const unsigned char swizzle2[] = {
471          2, LP_BLD_SWIZZLE_DONTCARE,
472          LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
473       };
474 
475       if (dims < 2) {
476          ddx_ddy[0] = lp_build_packed_ddx_ddy_onecoord(coord_bld, s);
477       }
478       else if (dims >= 2) {
479          ddx_ddy[0] = lp_build_packed_ddx_ddy_twocoord(coord_bld, s, t);
480          if (dims > 2) {
481             ddx_ddy[1] = lp_build_packed_ddx_ddy_onecoord(coord_bld, r);
482          }
483       }
484 
485       if (no_rho_opt) {
486          static const unsigned char swizzle01[] = { /* no-op swizzle */
487             0, 1,
488             LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
489          };
490          static const unsigned char swizzle23[] = {
491             2, 3,
492             LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
493          };
494          LLVMValueRef ddx_ddys, ddx_ddyt, floatdim, shuffles[LP_MAX_VECTOR_LENGTH / 4];
495 
496          for (i = 0; i < num_quads; i++) {
497             shuffles[i*4+0] = shuffles[i*4+1] = index0;
498             shuffles[i*4+2] = shuffles[i*4+3] = index1;
499          }
500          floatdim = LLVMBuildShuffleVector(builder, float_size, float_size,
501                                            LLVMConstVector(shuffles, length), "");
502          ddx_ddy[0] = lp_build_mul(coord_bld, ddx_ddy[0], floatdim);
503          ddx_ddy[0] = lp_build_mul(coord_bld, ddx_ddy[0], ddx_ddy[0]);
504          ddx_ddys = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle01);
505          ddx_ddyt = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle23);
506          rho_vec = lp_build_add(coord_bld, ddx_ddys, ddx_ddyt);
507 
508          if (dims > 2) {
509             static const unsigned char swizzle02[] = {
510                0, 2,
511                LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
512             };
513             floatdim = lp_build_extract_broadcast(gallivm, bld->float_size_in_type,
514                                                   coord_bld->type, float_size, index2);
515             ddx_ddy[1] = lp_build_mul(coord_bld, ddx_ddy[1], floatdim);
516             ddx_ddy[1] = lp_build_mul(coord_bld, ddx_ddy[1], ddx_ddy[1]);
517             ddx_ddy[1] = lp_build_swizzle_aos(coord_bld, ddx_ddy[1], swizzle02);
518             rho_vec = lp_build_add(coord_bld, rho_vec, ddx_ddy[1]);
519          }
520 
521          rho_xvec = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle0);
522          rho_yvec = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle1);
523          rho = lp_build_max(coord_bld, rho_xvec, rho_yvec);
524 
525          if (rho_per_quad) {
526             rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
527                                             rho_bld->type, rho, 0);
528          }
529          else {
530             rho = lp_build_swizzle_scalar_aos(coord_bld, rho, 0, 4);
531          }
532          /* skipping sqrt hence returning rho squared */
533       }
534       else {
535          ddx_ddy[0] = lp_build_abs(coord_bld, ddx_ddy[0]);
536          if (dims > 2) {
537             ddx_ddy[1] = lp_build_abs(coord_bld, ddx_ddy[1]);
538          }
539          else {
540             ddx_ddy[1] = NULL; /* silence compiler warning */
541          }
542 
543          if (dims < 2) {
544             rho_xvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle0);
545             rho_yvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle2);
546          }
547          else if (dims == 2) {
548             static const unsigned char swizzle02[] = {
549                0, 2,
550                LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
551             };
552             static const unsigned char swizzle13[] = {
553                1, 3,
554                LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
555             };
556             rho_xvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle02);
557             rho_yvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle13);
558          }
559          else {
560             LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH];
561             LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH];
562             assert(dims == 3);
563             for (i = 0; i < num_quads; i++) {
564                shuffles1[4*i + 0] = lp_build_const_int32(gallivm, 4*i);
565                shuffles1[4*i + 1] = lp_build_const_int32(gallivm, 4*i + 2);
566                shuffles1[4*i + 2] = lp_build_const_int32(gallivm, length + 4*i);
567                shuffles1[4*i + 3] = i32undef;
568                shuffles2[4*i + 0] = lp_build_const_int32(gallivm, 4*i + 1);
569                shuffles2[4*i + 1] = lp_build_const_int32(gallivm, 4*i + 3);
570                shuffles2[4*i + 2] = lp_build_const_int32(gallivm, length + 4*i + 2);
571                shuffles2[4*i + 3] = i32undef;
572             }
573             rho_xvec = LLVMBuildShuffleVector(builder, ddx_ddy[0], ddx_ddy[1],
574                                               LLVMConstVector(shuffles1, length), "");
575             rho_yvec = LLVMBuildShuffleVector(builder, ddx_ddy[0], ddx_ddy[1],
576                                               LLVMConstVector(shuffles2, length), "");
577          }
578 
579          rho_vec = lp_build_max(coord_bld, rho_xvec, rho_yvec);
580 
581          if (bld->coord_type.length > 4) {
582             /* expand size to each quad */
583             if (dims > 1) {
584                /* could use some broadcast_vector helper for this? */
585                LLVMValueRef src[LP_MAX_VECTOR_LENGTH/4];
586                for (i = 0; i < num_quads; i++) {
587                   src[i] = float_size;
588                }
589                float_size = lp_build_concat(bld->gallivm, src, float_size_bld->type, num_quads);
590             }
591             else {
592                float_size = lp_build_broadcast_scalar(coord_bld, float_size);
593             }
594             rho_vec = lp_build_mul(coord_bld, rho_vec, float_size);
595 
596             if (dims <= 1) {
597                rho = rho_vec;
598             }
599             else {
600                if (dims >= 2) {
601                   LLVMValueRef rho_s, rho_t, rho_r;
602 
603                   rho_s = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle0);
604                   rho_t = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle1);
605 
606                   rho = lp_build_max(coord_bld, rho_s, rho_t);
607 
608                   if (dims >= 3) {
609                      rho_r = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle2);
610                      rho = lp_build_max(coord_bld, rho, rho_r);
611                   }
612                }
613             }
614             if (rho_per_quad) {
615                rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
616                                                rho_bld->type, rho, 0);
617             }
618             else {
619                rho = lp_build_swizzle_scalar_aos(coord_bld, rho, 0, 4);
620             }
621          }
622          else {
623             if (dims <= 1) {
624                rho_vec = LLVMBuildExtractElement(builder, rho_vec, index0, "");
625             }
626             rho_vec = lp_build_mul(float_size_bld, rho_vec, float_size);
627 
628             if (dims <= 1) {
629                rho = rho_vec;
630             }
631             else {
632                if (dims >= 2) {
633                   LLVMValueRef rho_s, rho_t, rho_r;
634 
635                   rho_s = LLVMBuildExtractElement(builder, rho_vec, index0, "");
636                   rho_t = LLVMBuildExtractElement(builder, rho_vec, index1, "");
637 
638                   rho = lp_build_max(float_bld, rho_s, rho_t);
639 
640                   if (dims >= 3) {
641                      rho_r = LLVMBuildExtractElement(builder, rho_vec, index2, "");
642                      rho = lp_build_max(float_bld, rho, rho_r);
643                   }
644                }
645             }
646             if (!rho_per_quad) {
647                rho = lp_build_broadcast_scalar(rho_bld, rho);
648             }
649          }
650       }
651    }
652 
653    return rho;
654 }
655 
656 
657 /*
658  * Bri-linear lod computation
659  *
660  * Use a piece-wise linear approximation of log2 such that:
661  * - round to nearest, for values in the neighborhood of -1, 0, 1, 2, etc.
662  * - linear approximation for values in the neighborhood of 0.5, 1.5., etc,
663  *   with the steepness specified in 'factor'
664  * - exact result for 0.5, 1.5, etc.
665  *
666  *
667  *   1.0 -              /----*
668  *                     /
669  *                    /
670  *                   /
671  *   0.5 -          *
672  *                 /
673  *                /
674  *               /
675  *   0.0 - *----/
676  *
677  *         |                 |
678  *        2^0               2^1
679  *
680  * This is a technique also commonly used in hardware:
681  * - http://ixbtlabs.com/articles2/gffx/nv40-rx800-3.html
682  *
683  * TODO: For correctness, this should only be applied when texture is known to
684  * have regular mipmaps, i.e., mipmaps derived from the base level.
685  *
686  * TODO: This could be done in fixed point, where applicable.
687  */
688 static void
lp_build_brilinear_lod(struct lp_build_context * bld,LLVMValueRef lod,double factor,LLVMValueRef * out_lod_ipart,LLVMValueRef * out_lod_fpart)689 lp_build_brilinear_lod(struct lp_build_context *bld,
690                        LLVMValueRef lod,
691                        double factor,
692                        LLVMValueRef *out_lod_ipart,
693                        LLVMValueRef *out_lod_fpart)
694 {
695    LLVMValueRef lod_fpart;
696    double pre_offset = (factor - 0.5)/factor - 0.5;
697    double post_offset = 1 - factor;
698 
699    if (0) {
700       lp_build_printf(bld->gallivm, "lod = %f\n", lod);
701    }
702 
703    lod = lp_build_add(bld, lod,
704                       lp_build_const_vec(bld->gallivm, bld->type, pre_offset));
705 
706    lp_build_ifloor_fract(bld, lod, out_lod_ipart, &lod_fpart);
707 
708    lod_fpart = lp_build_mad(bld, lod_fpart,
709                             lp_build_const_vec(bld->gallivm, bld->type, factor),
710                             lp_build_const_vec(bld->gallivm, bld->type, post_offset));
711 
712    /*
713     * It's not necessary to clamp lod_fpart since:
714     * - the above expression will never produce numbers greater than one.
715     * - the mip filtering branch is only taken if lod_fpart is positive
716     */
717 
718    *out_lod_fpart = lod_fpart;
719 
720    if (0) {
721       lp_build_printf(bld->gallivm, "lod_ipart = %i\n", *out_lod_ipart);
722       lp_build_printf(bld->gallivm, "lod_fpart = %f\n\n", *out_lod_fpart);
723    }
724 }
725 
726 
727 /*
728  * Combined log2 and brilinear lod computation.
729  *
730  * It's in all identical to calling lp_build_fast_log2() and
731  * lp_build_brilinear_lod() above, but by combining we can compute the integer
732  * and fractional part independently.
733  */
734 static void
lp_build_brilinear_rho(struct lp_build_context * bld,LLVMValueRef rho,double factor,LLVMValueRef * out_lod_ipart,LLVMValueRef * out_lod_fpart)735 lp_build_brilinear_rho(struct lp_build_context *bld,
736                        LLVMValueRef rho,
737                        double factor,
738                        LLVMValueRef *out_lod_ipart,
739                        LLVMValueRef *out_lod_fpart)
740 {
741    LLVMValueRef lod_ipart;
742    LLVMValueRef lod_fpart;
743 
744    const double pre_factor = (2*factor - 0.5)/(M_SQRT2*factor);
745    const double post_offset = 1 - 2*factor;
746 
747    assert(bld->type.floating);
748 
749    assert(lp_check_value(bld->type, rho));
750 
751    /*
752     * The pre factor will make the intersections with the exact powers of two
753     * happen precisely where we want them to be, which means that the integer
754     * part will not need any post adjustments.
755     */
756    rho = lp_build_mul(bld, rho,
757                       lp_build_const_vec(bld->gallivm, bld->type, pre_factor));
758 
759    /* ipart = ifloor(log2(rho)) */
760    lod_ipart = lp_build_extract_exponent(bld, rho, 0);
761 
762    /* fpart = rho / 2**ipart */
763    lod_fpart = lp_build_extract_mantissa(bld, rho);
764 
765    lod_fpart = lp_build_mad(bld, lod_fpart,
766                             lp_build_const_vec(bld->gallivm, bld->type, factor),
767                             lp_build_const_vec(bld->gallivm, bld->type, post_offset));
768 
769    /*
770     * Like lp_build_brilinear_lod, it's not necessary to clamp lod_fpart since:
771     * - the above expression will never produce numbers greater than one.
772     * - the mip filtering branch is only taken if lod_fpart is positive
773     */
774 
775    *out_lod_ipart = lod_ipart;
776    *out_lod_fpart = lod_fpart;
777 }
778 
779 
780 /**
781  * Fast implementation of iround(log2(sqrt(x))), based on
782  * log2(x^n) == n*log2(x).
783  *
784  * Gives accurate results all the time.
785  * (Could be trivially extended to handle other power-of-two roots.)
786  */
787 static LLVMValueRef
lp_build_ilog2_sqrt(struct lp_build_context * bld,LLVMValueRef x)788 lp_build_ilog2_sqrt(struct lp_build_context *bld,
789                     LLVMValueRef x)
790 {
791    LLVMBuilderRef builder = bld->gallivm->builder;
792    LLVMValueRef ipart;
793    struct lp_type i_type = lp_int_type(bld->type);
794    LLVMValueRef one = lp_build_const_int_vec(bld->gallivm, i_type, 1);
795 
796    assert(bld->type.floating);
797 
798    assert(lp_check_value(bld->type, x));
799 
800    /* ipart = log2(x) + 0.5 = 0.5*(log2(x^2) + 1.0) */
801    ipart = lp_build_extract_exponent(bld, x, 1);
802    ipart = LLVMBuildAShr(builder, ipart, one, "");
803 
804    return ipart;
805 }
806 
807 
808 /**
809  * Generate code to compute texture level of detail (lambda).
810  * \param derivs  partial derivatives of (s, t, r, q) with respect to X and Y
811  * \param lod_bias  optional float vector with the shader lod bias
812  * \param explicit_lod  optional float vector with the explicit lod
813  * \param cube_rho  rho calculated by cube coord mapping (optional)
814  * \param out_lod_ipart  integer part of lod
815  * \param out_lod_fpart  float part of lod (never larger than 1 but may be negative)
816  * \param out_lod_positive  (mask) if lod is positive (i.e. texture is minified)
817  *
818  * The resulting lod can be scalar per quad or be per element.
819  */
820 void
lp_build_lod_selector(struct lp_build_sample_context * bld,boolean is_lodq,unsigned texture_unit,unsigned sampler_unit,LLVMValueRef s,LLVMValueRef t,LLVMValueRef r,LLVMValueRef cube_rho,const struct lp_derivatives * derivs,LLVMValueRef lod_bias,LLVMValueRef explicit_lod,unsigned mip_filter,LLVMValueRef max_aniso,LLVMValueRef * out_lod,LLVMValueRef * out_lod_ipart,LLVMValueRef * out_lod_fpart,LLVMValueRef * out_lod_positive)821 lp_build_lod_selector(struct lp_build_sample_context *bld,
822                       boolean is_lodq,
823                       unsigned texture_unit,
824                       unsigned sampler_unit,
825                       LLVMValueRef s,
826                       LLVMValueRef t,
827                       LLVMValueRef r,
828                       LLVMValueRef cube_rho,
829                       const struct lp_derivatives *derivs,
830                       LLVMValueRef lod_bias, /* optional */
831                       LLVMValueRef explicit_lod, /* optional */
832                       unsigned mip_filter,
833                       LLVMValueRef max_aniso,
834                       LLVMValueRef *out_lod,
835                       LLVMValueRef *out_lod_ipart,
836                       LLVMValueRef *out_lod_fpart,
837                       LLVMValueRef *out_lod_positive)
838 
839 {
840    LLVMBuilderRef builder = bld->gallivm->builder;
841    struct lp_sampler_dynamic_state *dynamic_state = bld->dynamic_state;
842    struct lp_build_context *lodf_bld = &bld->lodf_bld;
843    LLVMValueRef lod;
844 
845    *out_lod_ipart = bld->lodi_bld.zero;
846    *out_lod_positive = bld->lodi_bld.zero;
847    *out_lod_fpart = lodf_bld->zero;
848 
849    /*
850     * For determining min/mag, we follow GL 4.1 spec, 3.9.12 Texture Magnification:
851     * "Implementations may either unconditionally assume c = 0 for the minification
852     * vs. magnification switch-over point, or may choose to make c depend on the
853     * combination of minification and magnification modes as follows: if the
854     * magnification filter is given by LINEAR and the minification filter is given
855     * by NEAREST_MIPMAP_NEAREST or NEAREST_MIPMAP_LINEAR, then c = 0.5. This is
856     * done to ensure that a minified texture does not appear "sharper" than a
857     * magnified texture. Otherwise c = 0."
858     * And 3.9.11 Texture Minification:
859     * "If lod is less than or equal to the constant c (see section 3.9.12) the
860     * texture is said to be magnified; if it is greater, the texture is minified."
861     * So, using 0 as switchover point always, and using magnification for lod == 0.
862     * Note that the always c = 0 behavior is new (first appearing in GL 3.1 spec),
863     * old GL versions required 0.5 for the modes listed above.
864     * I have no clue about the (undocumented) wishes of d3d9/d3d10 here!
865     */
866 
867    if (bld->static_sampler_state->min_max_lod_equal && !is_lodq) {
868       /* User is forcing sampling from a particular mipmap level.
869        * This is hit during mipmap generation.
870        */
871       LLVMValueRef min_lod =
872          dynamic_state->min_lod(dynamic_state, bld->gallivm,
873                                 bld->context_ptr, sampler_unit);
874 
875       lod = lp_build_broadcast_scalar(lodf_bld, min_lod);
876    }
877    else {
878       if (explicit_lod) {
879          if (bld->num_lods != bld->coord_type.length)
880             lod = lp_build_pack_aos_scalars(bld->gallivm, bld->coord_bld.type,
881                                             lodf_bld->type, explicit_lod, 0);
882          else
883             lod = explicit_lod;
884       }
885       else {
886          LLVMValueRef rho;
887          boolean rho_squared = (bld->no_rho_approx &&
888                                 (bld->dims > 1)) || cube_rho;
889 
890          if (bld->static_sampler_state->aniso &&
891              !explicit_lod) {
892             rho = lp_build_pmin(bld, texture_unit, s, t, max_aniso);
893             rho_squared = true;
894          } else
895             rho = lp_build_rho(bld, texture_unit, s, t, r, cube_rho, derivs);
896 
897          /*
898           * Compute lod = log2(rho)
899           */
900 
901          if (!lod_bias && !is_lodq &&
902              !bld->static_sampler_state->aniso &&
903              !bld->static_sampler_state->lod_bias_non_zero &&
904              !bld->static_sampler_state->apply_max_lod &&
905              !bld->static_sampler_state->apply_min_lod) {
906             /*
907              * Special case when there are no post-log2 adjustments, which
908              * saves instructions but keeping the integer and fractional lod
909              * computations separate from the start.
910              */
911 
912             if (mip_filter == PIPE_TEX_MIPFILTER_NONE ||
913                 mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
914                /*
915                 * Don't actually need both values all the time, lod_ipart is
916                 * needed for nearest mipfilter, lod_positive if min != mag.
917                 */
918                if (rho_squared) {
919                   *out_lod_ipart = lp_build_ilog2_sqrt(lodf_bld, rho);
920                }
921                else {
922                   *out_lod_ipart = lp_build_ilog2(lodf_bld, rho);
923                }
924                *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER,
925                                                 rho, lodf_bld->one);
926                return;
927             }
928             if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR &&
929                 !bld->no_brilinear && !rho_squared &&
930                 !bld->static_sampler_state->aniso) {
931                /*
932                 * This can't work if rho is squared. Not sure if it could be
933                 * fixed while keeping it worthwile, could also do sqrt here
934                 * but brilinear and no_rho_opt seems like a combination not
935                 * making much sense anyway so just use ordinary path below.
936                 */
937                lp_build_brilinear_rho(lodf_bld, rho, BRILINEAR_FACTOR,
938                                       out_lod_ipart, out_lod_fpart);
939                *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER,
940                                                 rho, lodf_bld->one);
941                return;
942             }
943          }
944 
945          if (0) {
946             lod = lp_build_log2(lodf_bld, rho);
947          }
948          else {
949             /* get more accurate results if we just sqaure rho always */
950             if (!rho_squared)
951                rho = lp_build_mul(lodf_bld, rho, rho);
952             lod = lp_build_fast_log2(lodf_bld, rho);
953          }
954 
955          /* log2(x^2) == 0.5*log2(x) */
956          lod = lp_build_mul(lodf_bld, lod,
957                             lp_build_const_vec(bld->gallivm, lodf_bld->type, 0.5F));
958 
959          /* add shader lod bias */
960          if (lod_bias) {
961             if (bld->num_lods != bld->coord_type.length)
962                lod_bias = lp_build_pack_aos_scalars(bld->gallivm, bld->coord_bld.type,
963                                                     lodf_bld->type, lod_bias, 0);
964             lod = LLVMBuildFAdd(builder, lod, lod_bias, "shader_lod_bias");
965          }
966       }
967 
968       /* add sampler lod bias */
969       if (bld->static_sampler_state->lod_bias_non_zero) {
970          LLVMValueRef sampler_lod_bias =
971             dynamic_state->lod_bias(dynamic_state, bld->gallivm,
972                                     bld->context_ptr, sampler_unit);
973          sampler_lod_bias = lp_build_broadcast_scalar(lodf_bld,
974                                                       sampler_lod_bias);
975          lod = LLVMBuildFAdd(builder, lod, sampler_lod_bias, "sampler_lod_bias");
976       }
977 
978       if (is_lodq) {
979          *out_lod = lod;
980       }
981 
982       /* clamp lod */
983       if (bld->static_sampler_state->apply_max_lod) {
984          LLVMValueRef max_lod =
985             dynamic_state->max_lod(dynamic_state, bld->gallivm,
986                                    bld->context_ptr, sampler_unit);
987          max_lod = lp_build_broadcast_scalar(lodf_bld, max_lod);
988 
989          lod = lp_build_min(lodf_bld, lod, max_lod);
990       }
991       if (bld->static_sampler_state->apply_min_lod) {
992          LLVMValueRef min_lod =
993             dynamic_state->min_lod(dynamic_state, bld->gallivm,
994                                    bld->context_ptr, sampler_unit);
995          min_lod = lp_build_broadcast_scalar(lodf_bld, min_lod);
996 
997          lod = lp_build_max(lodf_bld, lod, min_lod);
998       }
999 
1000       if (is_lodq) {
1001          *out_lod_fpart = lod;
1002          return;
1003       }
1004    }
1005 
1006    *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER,
1007                                     lod, lodf_bld->zero);
1008 
1009    if (bld->static_sampler_state->aniso) {
1010       *out_lod_ipart = lp_build_itrunc(lodf_bld, lod);
1011    } else if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1012       if (!bld->no_brilinear) {
1013          lp_build_brilinear_lod(lodf_bld, lod, BRILINEAR_FACTOR,
1014                                 out_lod_ipart, out_lod_fpart);
1015       }
1016       else {
1017          lp_build_ifloor_fract(lodf_bld, lod, out_lod_ipart, out_lod_fpart);
1018       }
1019 
1020       lp_build_name(*out_lod_fpart, "lod_fpart");
1021    }
1022    else {
1023       *out_lod_ipart = lp_build_iround(lodf_bld, lod);
1024    }
1025 
1026    lp_build_name(*out_lod_ipart, "lod_ipart");
1027 
1028    return;
1029 }
1030 
1031 
1032 /**
1033  * For PIPE_TEX_MIPFILTER_NEAREST, convert int part of lod
1034  * to actual mip level.
1035  * Note: this is all scalar per quad code.
1036  * \param lod_ipart  int texture level of detail
1037  * \param level_out  returns integer
1038  * \param out_of_bounds returns per coord out_of_bounds mask if provided
1039  */
1040 void
lp_build_nearest_mip_level(struct lp_build_sample_context * bld,unsigned texture_unit,LLVMValueRef lod_ipart,LLVMValueRef * level_out,LLVMValueRef * out_of_bounds)1041 lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
1042                            unsigned texture_unit,
1043                            LLVMValueRef lod_ipart,
1044                            LLVMValueRef *level_out,
1045                            LLVMValueRef *out_of_bounds)
1046 {
1047    struct lp_build_context *leveli_bld = &bld->leveli_bld;
1048    struct lp_sampler_dynamic_state *dynamic_state = bld->dynamic_state;
1049    LLVMValueRef first_level, last_level, level;
1050 
1051    first_level = dynamic_state->first_level(dynamic_state, bld->gallivm,
1052                                             bld->context_ptr, texture_unit, NULL);
1053    last_level = dynamic_state->last_level(dynamic_state, bld->gallivm,
1054                                           bld->context_ptr, texture_unit, NULL);
1055    first_level = lp_build_broadcast_scalar(leveli_bld, first_level);
1056    last_level = lp_build_broadcast_scalar(leveli_bld, last_level);
1057 
1058    level = lp_build_add(leveli_bld, lod_ipart, first_level);
1059 
1060    if (out_of_bounds) {
1061       LLVMValueRef out, out1;
1062       out = lp_build_cmp(leveli_bld, PIPE_FUNC_LESS, level, first_level);
1063       out1 = lp_build_cmp(leveli_bld, PIPE_FUNC_GREATER, level, last_level);
1064       out = lp_build_or(leveli_bld, out, out1);
1065       if (bld->num_mips == bld->coord_bld.type.length) {
1066          *out_of_bounds = out;
1067       }
1068       else if (bld->num_mips == 1) {
1069          *out_of_bounds = lp_build_broadcast_scalar(&bld->int_coord_bld, out);
1070       }
1071       else {
1072          assert(bld->num_mips == bld->coord_bld.type.length / 4);
1073          *out_of_bounds = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
1074                                                                 leveli_bld->type,
1075                                                                 bld->int_coord_bld.type,
1076                                                                 out);
1077       }
1078       level = lp_build_andnot(&bld->int_coord_bld, level, *out_of_bounds);
1079       *level_out = level;
1080    }
1081    else {
1082       /* clamp level to legal range of levels */
1083       *level_out = lp_build_clamp(leveli_bld, level, first_level, last_level);
1084 
1085    }
1086 }
1087 
1088 
1089 /**
1090  * For PIPE_TEX_MIPFILTER_LINEAR, convert per-quad (or per element) int LOD(s)
1091  * to two (per-quad) (adjacent) mipmap level indexes, and fix up float lod
1092  * part accordingly.
1093  * Later, we'll sample from those two mipmap levels and interpolate between them.
1094  */
1095 void
lp_build_linear_mip_levels(struct lp_build_sample_context * bld,unsigned texture_unit,LLVMValueRef lod_ipart,LLVMValueRef * lod_fpart_inout,LLVMValueRef * level0_out,LLVMValueRef * level1_out)1096 lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
1097                            unsigned texture_unit,
1098                            LLVMValueRef lod_ipart,
1099                            LLVMValueRef *lod_fpart_inout,
1100                            LLVMValueRef *level0_out,
1101                            LLVMValueRef *level1_out)
1102 {
1103    LLVMBuilderRef builder = bld->gallivm->builder;
1104    struct lp_sampler_dynamic_state *dynamic_state = bld->dynamic_state;
1105    struct lp_build_context *leveli_bld = &bld->leveli_bld;
1106    struct lp_build_context *levelf_bld = &bld->levelf_bld;
1107    LLVMValueRef first_level, last_level;
1108    LLVMValueRef clamp_min;
1109    LLVMValueRef clamp_max;
1110 
1111    assert(bld->num_lods == bld->num_mips);
1112 
1113    first_level = dynamic_state->first_level(dynamic_state, bld->gallivm,
1114                                             bld->context_ptr, texture_unit, NULL);
1115    last_level = dynamic_state->last_level(dynamic_state, bld->gallivm,
1116                                           bld->context_ptr, texture_unit, NULL);
1117    first_level = lp_build_broadcast_scalar(leveli_bld, first_level);
1118    last_level = lp_build_broadcast_scalar(leveli_bld, last_level);
1119 
1120    *level0_out = lp_build_add(leveli_bld, lod_ipart, first_level);
1121    *level1_out = lp_build_add(leveli_bld, *level0_out, leveli_bld->one);
1122 
1123    /*
1124     * Clamp both *level0_out and *level1_out to [first_level, last_level], with
1125     * the minimum number of comparisons, and zeroing lod_fpart in the extreme
1126     * ends in the process.
1127     */
1128 
1129    /* *level0_out < first_level */
1130    clamp_min = LLVMBuildICmp(builder, LLVMIntSLT,
1131                              *level0_out, first_level,
1132                              "clamp_lod_to_first");
1133 
1134    *level0_out = LLVMBuildSelect(builder, clamp_min,
1135                                  first_level, *level0_out, "");
1136 
1137    *level1_out = LLVMBuildSelect(builder, clamp_min,
1138                                  first_level, *level1_out, "");
1139 
1140    *lod_fpart_inout = LLVMBuildSelect(builder, clamp_min,
1141                                       levelf_bld->zero, *lod_fpart_inout, "");
1142 
1143    /* *level0_out >= last_level */
1144    clamp_max = LLVMBuildICmp(builder, LLVMIntSGE,
1145                              *level0_out, last_level,
1146                              "clamp_lod_to_last");
1147 
1148    *level0_out = LLVMBuildSelect(builder, clamp_max,
1149                                  last_level, *level0_out, "");
1150 
1151    *level1_out = LLVMBuildSelect(builder, clamp_max,
1152                                  last_level, *level1_out, "");
1153 
1154    *lod_fpart_inout = LLVMBuildSelect(builder, clamp_max,
1155                                       levelf_bld->zero, *lod_fpart_inout, "");
1156 
1157    lp_build_name(*level0_out, "texture%u_miplevel0", texture_unit);
1158    lp_build_name(*level1_out, "texture%u_miplevel1", texture_unit);
1159    lp_build_name(*lod_fpart_inout, "texture%u_mipweight", texture_unit);
1160 }
1161 
1162 
1163 /**
1164  * Return pointer to a single mipmap level.
1165  * \param level  integer mipmap level
1166  */
1167 LLVMValueRef
lp_build_get_mipmap_level(struct lp_build_sample_context * bld,LLVMValueRef level)1168 lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
1169                           LLVMValueRef level)
1170 {
1171    LLVMBuilderRef builder = bld->gallivm->builder;
1172    LLVMValueRef indexes[2], data_ptr, mip_offset;
1173 
1174    indexes[0] = lp_build_const_int32(bld->gallivm, 0);
1175    indexes[1] = level;
1176    mip_offset = LLVMBuildGEP(builder, bld->mip_offsets, indexes, 2, "");
1177    mip_offset = LLVMBuildLoad(builder, mip_offset, "");
1178    data_ptr = LLVMBuildGEP(builder, bld->base_ptr, &mip_offset, 1, "");
1179    return data_ptr;
1180 }
1181 
1182 /**
1183  * Return (per-pixel) offsets to mip levels.
1184  * \param level  integer mipmap level
1185  */
1186 LLVMValueRef
lp_build_get_mip_offsets(struct lp_build_sample_context * bld,LLVMValueRef level)1187 lp_build_get_mip_offsets(struct lp_build_sample_context *bld,
1188                          LLVMValueRef level)
1189 {
1190    LLVMBuilderRef builder = bld->gallivm->builder;
1191    LLVMValueRef indexes[2], offsets, offset1;
1192 
1193    indexes[0] = lp_build_const_int32(bld->gallivm, 0);
1194    if (bld->num_mips == 1) {
1195       indexes[1] = level;
1196       offset1 = LLVMBuildGEP(builder, bld->mip_offsets, indexes, 2, "");
1197       offset1 = LLVMBuildLoad(builder, offset1, "");
1198       offsets = lp_build_broadcast_scalar(&bld->int_coord_bld, offset1);
1199    }
1200    else if (bld->num_mips == bld->coord_bld.type.length / 4) {
1201       unsigned i;
1202 
1203       offsets = bld->int_coord_bld.undef;
1204       for (i = 0; i < bld->num_mips; i++) {
1205          LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1206          LLVMValueRef indexo = lp_build_const_int32(bld->gallivm, 4 * i);
1207          indexes[1] = LLVMBuildExtractElement(builder, level, indexi, "");
1208          offset1 = LLVMBuildGEP(builder, bld->mip_offsets, indexes, 2, "");
1209          offset1 = LLVMBuildLoad(builder, offset1, "");
1210          offsets = LLVMBuildInsertElement(builder, offsets, offset1, indexo, "");
1211       }
1212       offsets = lp_build_swizzle_scalar_aos(&bld->int_coord_bld, offsets, 0, 4);
1213    }
1214    else {
1215       unsigned i;
1216 
1217       assert (bld->num_mips == bld->coord_bld.type.length);
1218 
1219       offsets = bld->int_coord_bld.undef;
1220       for (i = 0; i < bld->num_mips; i++) {
1221          LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1222          indexes[1] = LLVMBuildExtractElement(builder, level, indexi, "");
1223          offset1 = LLVMBuildGEP(builder, bld->mip_offsets, indexes, 2, "");
1224          offset1 = LLVMBuildLoad(builder, offset1, "");
1225          offsets = LLVMBuildInsertElement(builder, offsets, offset1, indexi, "");
1226       }
1227    }
1228    return offsets;
1229 }
1230 
1231 
1232 /**
1233  * Codegen equivalent for u_minify().
1234  * @param lod_scalar  if lod is a (broadcasted) scalar
1235  * Return max(1, base_size >> level);
1236  */
1237 LLVMValueRef
lp_build_minify(struct lp_build_context * bld,LLVMValueRef base_size,LLVMValueRef level,boolean lod_scalar)1238 lp_build_minify(struct lp_build_context *bld,
1239                 LLVMValueRef base_size,
1240                 LLVMValueRef level,
1241                 boolean lod_scalar)
1242 {
1243    LLVMBuilderRef builder = bld->gallivm->builder;
1244    assert(lp_check_value(bld->type, base_size));
1245    assert(lp_check_value(bld->type, level));
1246 
1247    if (level == bld->zero) {
1248       /* if we're using mipmap level zero, no minification is needed */
1249       return base_size;
1250    }
1251    else {
1252       LLVMValueRef size;
1253       assert(bld->type.sign);
1254       if (lod_scalar ||
1255          (util_get_cpu_caps()->has_avx2 || !util_get_cpu_caps()->has_sse)) {
1256          size = LLVMBuildLShr(builder, base_size, level, "minify");
1257          size = lp_build_max(bld, size, bld->one);
1258       }
1259       else {
1260          /*
1261           * emulate shift with float mul, since intel "forgot" shifts with
1262           * per-element shift count until avx2, which results in terrible
1263           * scalar extraction (both count and value), scalar shift,
1264           * vector reinsertion. Should not be an issue on any non-x86 cpu
1265           * with a vector instruction set.
1266           * On cpus with AMD's XOP this should also be unnecessary but I'm
1267           * not sure if llvm would emit this with current flags.
1268           */
1269          LLVMValueRef const127, const23, lf;
1270          struct lp_type ftype;
1271          struct lp_build_context fbld;
1272          ftype = lp_type_float_vec(32, bld->type.length * bld->type.width);
1273          lp_build_context_init(&fbld, bld->gallivm, ftype);
1274          const127 = lp_build_const_int_vec(bld->gallivm, bld->type, 127);
1275          const23 = lp_build_const_int_vec(bld->gallivm, bld->type, 23);
1276 
1277          /* calculate 2^(-level) float */
1278          lf = lp_build_sub(bld, const127, level);
1279          lf = lp_build_shl(bld, lf, const23);
1280          lf = LLVMBuildBitCast(builder, lf, fbld.vec_type, "");
1281 
1282          /* finish shift operation by doing float mul */
1283          base_size = lp_build_int_to_float(&fbld, base_size);
1284          size = lp_build_mul(&fbld, base_size, lf);
1285          /*
1286           * do the max also with floats because
1287           * a) non-emulated int max requires sse41
1288           *    (this is actually a lie as we could cast to 16bit values
1289           *    as 16bit is sufficient and 16bit int max is sse2)
1290           * b) with avx we can do int max 4-wide but float max 8-wide
1291           */
1292          size = lp_build_max(&fbld, size, fbld.one);
1293          size = lp_build_itrunc(&fbld, size);
1294       }
1295       return size;
1296    }
1297 }
1298 
1299 
1300 /**
1301  * Dereference stride_array[mipmap_level] array to get a stride.
1302  * Return stride as a vector.
1303  */
1304 static LLVMValueRef
lp_build_get_level_stride_vec(struct lp_build_sample_context * bld,LLVMValueRef stride_array,LLVMValueRef level)1305 lp_build_get_level_stride_vec(struct lp_build_sample_context *bld,
1306                               LLVMValueRef stride_array, LLVMValueRef level)
1307 {
1308    LLVMBuilderRef builder = bld->gallivm->builder;
1309    LLVMValueRef indexes[2], stride, stride1;
1310    indexes[0] = lp_build_const_int32(bld->gallivm, 0);
1311    if (bld->num_mips == 1) {
1312       indexes[1] = level;
1313       stride1 = LLVMBuildGEP(builder, stride_array, indexes, 2, "");
1314       stride1 = LLVMBuildLoad(builder, stride1, "");
1315       stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride1);
1316    }
1317    else if (bld->num_mips == bld->coord_bld.type.length / 4) {
1318       LLVMValueRef stride1;
1319       unsigned i;
1320 
1321       stride = bld->int_coord_bld.undef;
1322       for (i = 0; i < bld->num_mips; i++) {
1323          LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1324          LLVMValueRef indexo = lp_build_const_int32(bld->gallivm, 4 * i);
1325          indexes[1] = LLVMBuildExtractElement(builder, level, indexi, "");
1326          stride1 = LLVMBuildGEP(builder, stride_array, indexes, 2, "");
1327          stride1 = LLVMBuildLoad(builder, stride1, "");
1328          stride = LLVMBuildInsertElement(builder, stride, stride1, indexo, "");
1329       }
1330       stride = lp_build_swizzle_scalar_aos(&bld->int_coord_bld, stride, 0, 4);
1331    }
1332    else {
1333       LLVMValueRef stride1;
1334       unsigned i;
1335 
1336       assert (bld->num_mips == bld->coord_bld.type.length);
1337 
1338       stride = bld->int_coord_bld.undef;
1339       for (i = 0; i < bld->coord_bld.type.length; i++) {
1340          LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1341          indexes[1] = LLVMBuildExtractElement(builder, level, indexi, "");
1342          stride1 = LLVMBuildGEP(builder, stride_array, indexes, 2, "");
1343          stride1 = LLVMBuildLoad(builder, stride1, "");
1344          stride = LLVMBuildInsertElement(builder, stride, stride1, indexi, "");
1345       }
1346    }
1347    return stride;
1348 }
1349 
1350 
1351 /**
1352  * When sampling a mipmap, we need to compute the width, height, depth
1353  * of the source levels from the level indexes.  This helper function
1354  * does that.
1355  */
1356 void
lp_build_mipmap_level_sizes(struct lp_build_sample_context * bld,LLVMValueRef ilevel,LLVMValueRef * out_size,LLVMValueRef * row_stride_vec,LLVMValueRef * img_stride_vec)1357 lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
1358                             LLVMValueRef ilevel,
1359                             LLVMValueRef *out_size,
1360                             LLVMValueRef *row_stride_vec,
1361                             LLVMValueRef *img_stride_vec)
1362 {
1363    const unsigned dims = bld->dims;
1364    LLVMValueRef ilevel_vec;
1365 
1366    /*
1367     * Compute width, height, depth at mipmap level 'ilevel'
1368     */
1369    if (bld->num_mips == 1) {
1370       ilevel_vec = lp_build_broadcast_scalar(&bld->int_size_bld, ilevel);
1371       *out_size = lp_build_minify(&bld->int_size_bld, bld->int_size, ilevel_vec, TRUE);
1372    }
1373    else {
1374       LLVMValueRef int_size_vec;
1375       LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
1376       unsigned num_quads = bld->coord_bld.type.length / 4;
1377       unsigned i;
1378 
1379       if (bld->num_mips == num_quads) {
1380          /*
1381           * XXX: this should be #ifndef SANE_INSTRUCTION_SET.
1382           * intel "forgot" the variable shift count instruction until avx2.
1383           * A harmless 8x32 shift gets translated into 32 instructions
1384           * (16 extracts, 8 scalar shifts, 8 inserts), llvm is apparently
1385           * unable to recognize if there are really just 2 different shift
1386           * count values. So do the shift 4-wide before expansion.
1387           */
1388          struct lp_build_context bld4;
1389          struct lp_type type4;
1390 
1391          type4 = bld->int_coord_bld.type;
1392          type4.length = 4;
1393 
1394          lp_build_context_init(&bld4, bld->gallivm, type4);
1395 
1396          if (bld->dims == 1) {
1397             assert(bld->int_size_in_bld.type.length == 1);
1398             int_size_vec = lp_build_broadcast_scalar(&bld4,
1399                                                      bld->int_size);
1400          }
1401          else {
1402             assert(bld->int_size_in_bld.type.length == 4);
1403             int_size_vec = bld->int_size;
1404          }
1405 
1406          for (i = 0; i < num_quads; i++) {
1407             LLVMValueRef ileveli;
1408             LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1409 
1410             ileveli = lp_build_extract_broadcast(bld->gallivm,
1411                                                  bld->leveli_bld.type,
1412                                                  bld4.type,
1413                                                  ilevel,
1414                                                  indexi);
1415             tmp[i] = lp_build_minify(&bld4, int_size_vec, ileveli, TRUE);
1416          }
1417          /*
1418           * out_size is [w0, h0, d0, _, w1, h1, d1, _, ...] vector for dims > 1,
1419           * [w0, w0, w0, w0, w1, w1, w1, w1, ...] otherwise.
1420           */
1421          *out_size = lp_build_concat(bld->gallivm,
1422                                      tmp,
1423                                      bld4.type,
1424                                      num_quads);
1425       }
1426       else {
1427         /* FIXME: this is terrible and results in _huge_ vector
1428          * (for the dims > 1 case).
1429          * Should refactor this (together with extract_image_sizes) and do
1430          * something more useful. Could for instance if we have width,height
1431          * with 4-wide vector pack all elements into a 8xi16 vector
1432          * (on which we can still do useful math) instead of using a 16xi32
1433          * vector.
1434          * For dims == 1 this will create [w0, w1, w2, w3, ...] vector.
1435          * For dims > 1 this will create [w0, h0, d0, _, w1, h1, d1, _, ...] vector.
1436          */
1437          assert(bld->num_mips == bld->coord_bld.type.length);
1438          if (bld->dims == 1) {
1439             assert(bld->int_size_in_bld.type.length == 1);
1440             int_size_vec = lp_build_broadcast_scalar(&bld->int_coord_bld,
1441                                                      bld->int_size);
1442             *out_size = lp_build_minify(&bld->int_coord_bld, int_size_vec, ilevel, FALSE);
1443          }
1444          else {
1445             LLVMValueRef ilevel1;
1446             for (i = 0; i < bld->num_mips; i++) {
1447                LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1448                ilevel1 = lp_build_extract_broadcast(bld->gallivm, bld->int_coord_type,
1449                                                     bld->int_size_in_bld.type, ilevel, indexi);
1450                tmp[i] = bld->int_size;
1451                tmp[i] = lp_build_minify(&bld->int_size_in_bld, tmp[i], ilevel1, TRUE);
1452             }
1453             *out_size = lp_build_concat(bld->gallivm, tmp,
1454                                         bld->int_size_in_bld.type,
1455                                         bld->num_mips);
1456          }
1457       }
1458    }
1459 
1460    if (dims >= 2) {
1461       *row_stride_vec = lp_build_get_level_stride_vec(bld,
1462                                                       bld->row_stride_array,
1463                                                       ilevel);
1464    }
1465    if (dims == 3 || has_layer_coord(bld->static_texture_state->target)) {
1466       *img_stride_vec = lp_build_get_level_stride_vec(bld,
1467                                                       bld->img_stride_array,
1468                                                       ilevel);
1469    }
1470 }
1471 
1472 
1473 /**
1474  * Extract and broadcast texture size.
1475  *
1476  * @param size_type   type of the texture size vector (either
1477  *                    bld->int_size_type or bld->float_size_type)
1478  * @param coord_type  type of the texture size vector (either
1479  *                    bld->int_coord_type or bld->coord_type)
1480  * @param size        vector with the texture size (width, height, depth)
1481  */
1482 void
lp_build_extract_image_sizes(struct lp_build_sample_context * bld,struct lp_build_context * size_bld,struct lp_type coord_type,LLVMValueRef size,LLVMValueRef * out_width,LLVMValueRef * out_height,LLVMValueRef * out_depth)1483 lp_build_extract_image_sizes(struct lp_build_sample_context *bld,
1484                              struct lp_build_context *size_bld,
1485                              struct lp_type coord_type,
1486                              LLVMValueRef size,
1487                              LLVMValueRef *out_width,
1488                              LLVMValueRef *out_height,
1489                              LLVMValueRef *out_depth)
1490 {
1491    const unsigned dims = bld->dims;
1492    LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
1493    struct lp_type size_type = size_bld->type;
1494 
1495    if (bld->num_mips == 1) {
1496       *out_width = lp_build_extract_broadcast(bld->gallivm,
1497                                               size_type,
1498                                               coord_type,
1499                                               size,
1500                                               LLVMConstInt(i32t, 0, 0));
1501       if (dims >= 2) {
1502          *out_height = lp_build_extract_broadcast(bld->gallivm,
1503                                                   size_type,
1504                                                   coord_type,
1505                                                   size,
1506                                                   LLVMConstInt(i32t, 1, 0));
1507          if (dims == 3) {
1508             *out_depth = lp_build_extract_broadcast(bld->gallivm,
1509                                                     size_type,
1510                                                     coord_type,
1511                                                     size,
1512                                                     LLVMConstInt(i32t, 2, 0));
1513          }
1514       }
1515    }
1516    else {
1517       unsigned num_quads = bld->coord_bld.type.length / 4;
1518 
1519       if (dims == 1) {
1520          *out_width = size;
1521       }
1522       else if (bld->num_mips == num_quads) {
1523          *out_width = lp_build_swizzle_scalar_aos(size_bld, size, 0, 4);
1524          if (dims >= 2) {
1525             *out_height = lp_build_swizzle_scalar_aos(size_bld, size, 1, 4);
1526             if (dims == 3) {
1527                *out_depth = lp_build_swizzle_scalar_aos(size_bld, size, 2, 4);
1528             }
1529          }
1530       }
1531       else {
1532          assert(bld->num_mips == bld->coord_type.length);
1533          *out_width = lp_build_pack_aos_scalars(bld->gallivm, size_type,
1534                                                 coord_type, size, 0);
1535          if (dims >= 2) {
1536             *out_height = lp_build_pack_aos_scalars(bld->gallivm, size_type,
1537                                                     coord_type, size, 1);
1538             if (dims == 3) {
1539                *out_depth = lp_build_pack_aos_scalars(bld->gallivm, size_type,
1540                                                       coord_type, size, 2);
1541             }
1542          }
1543       }
1544    }
1545 }
1546 
1547 
1548 /**
1549  * Unnormalize coords.
1550  *
1551  * @param flt_size  vector with the integer texture size (width, height, depth)
1552  */
1553 void
lp_build_unnormalized_coords(struct lp_build_sample_context * bld,LLVMValueRef flt_size,LLVMValueRef * s,LLVMValueRef * t,LLVMValueRef * r)1554 lp_build_unnormalized_coords(struct lp_build_sample_context *bld,
1555                              LLVMValueRef flt_size,
1556                              LLVMValueRef *s,
1557                              LLVMValueRef *t,
1558                              LLVMValueRef *r)
1559 {
1560    const unsigned dims = bld->dims;
1561    LLVMValueRef width;
1562    LLVMValueRef height = NULL;
1563    LLVMValueRef depth = NULL;
1564 
1565    lp_build_extract_image_sizes(bld,
1566                                 &bld->float_size_bld,
1567                                 bld->coord_type,
1568                                 flt_size,
1569                                 &width,
1570                                 &height,
1571                                 &depth);
1572 
1573    /* s = s * width, t = t * height */
1574    *s = lp_build_mul(&bld->coord_bld, *s, width);
1575    if (dims >= 2) {
1576       *t = lp_build_mul(&bld->coord_bld, *t, height);
1577       if (dims >= 3) {
1578          *r = lp_build_mul(&bld->coord_bld, *r, depth);
1579       }
1580    }
1581 }
1582 
1583 /**
1584  * Generate new coords and faces for cubemap texels falling off the face.
1585  *
1586  * @param face   face (center) of the pixel
1587  * @param x0     lower x coord
1588  * @param x1     higher x coord (must be x0 + 1)
1589  * @param y0     lower y coord
1590  * @param y1     higher y coord (must be x0 + 1)
1591  * @param max_coord     texture cube (level) size - 1
1592  * @param next_faces    new face values when falling off
1593  * @param next_xcoords  new x coord values when falling off
1594  * @param next_ycoords  new y coord values when falling off
1595  *
1596  * The arrays hold the new values when under/overflow of
1597  * lower x, higher x, lower y, higher y coord would occur (in this order).
1598  * next_xcoords/next_ycoords have two entries each (for both new lower and
1599  * higher coord).
1600  */
1601 void
lp_build_cube_new_coords(struct lp_build_context * ivec_bld,LLVMValueRef face,LLVMValueRef x0,LLVMValueRef x1,LLVMValueRef y0,LLVMValueRef y1,LLVMValueRef max_coord,LLVMValueRef next_faces[4],LLVMValueRef next_xcoords[4][2],LLVMValueRef next_ycoords[4][2])1602 lp_build_cube_new_coords(struct lp_build_context *ivec_bld,
1603                         LLVMValueRef face,
1604                         LLVMValueRef x0,
1605                         LLVMValueRef x1,
1606                         LLVMValueRef y0,
1607                         LLVMValueRef y1,
1608                         LLVMValueRef max_coord,
1609                         LLVMValueRef next_faces[4],
1610                         LLVMValueRef next_xcoords[4][2],
1611                         LLVMValueRef next_ycoords[4][2])
1612 {
1613    /*
1614     * Lookup tables aren't nice for simd code hence try some logic here.
1615     * (Note that while it would not be necessary to do per-sample (4) lookups
1616     * when using a LUT as it's impossible that texels fall off of positive
1617     * and negative edges simultaneously, it would however be necessary to
1618     * do 2 lookups for corner handling as in this case texels both fall off
1619     * of x and y axes.)
1620     */
1621    /*
1622     * Next faces (for face 012345):
1623     * x < 0.0  : 451110
1624     * x >= 1.0 : 540001
1625     * y < 0.0  : 225422
1626     * y >= 1.0 : 334533
1627     * Hence nfx+ (and nfy+) == nfx- (nfy-) xor 1
1628     * nfx-: face > 1 ? (face == 5 ? 0 : 1) : (4 + face & 1)
1629     * nfy+: face & ~4 > 1 ? face + 2 : 3;
1630     * This could also use pshufb instead, but would need (manually coded)
1631     * ssse3 intrinsic (llvm won't do non-constant shuffles).
1632     */
1633    struct gallivm_state *gallivm = ivec_bld->gallivm;
1634    LLVMValueRef sel, sel_f2345, sel_f23, sel_f2, tmpsel, tmp;
1635    LLVMValueRef faceand1, sel_fand1, maxmx0, maxmx1, maxmy0, maxmy1;
1636    LLVMValueRef c2 = lp_build_const_int_vec(gallivm, ivec_bld->type, 2);
1637    LLVMValueRef c3 = lp_build_const_int_vec(gallivm, ivec_bld->type, 3);
1638    LLVMValueRef c4 = lp_build_const_int_vec(gallivm, ivec_bld->type, 4);
1639    LLVMValueRef c5 = lp_build_const_int_vec(gallivm, ivec_bld->type, 5);
1640 
1641    sel = lp_build_cmp(ivec_bld, PIPE_FUNC_EQUAL, face, c5);
1642    tmpsel = lp_build_select(ivec_bld, sel, ivec_bld->zero, ivec_bld->one);
1643    sel_f2345 = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, face, ivec_bld->one);
1644    faceand1 = lp_build_and(ivec_bld, face, ivec_bld->one);
1645    tmp = lp_build_add(ivec_bld, faceand1, c4);
1646    next_faces[0] = lp_build_select(ivec_bld, sel_f2345, tmpsel, tmp);
1647    next_faces[1] = lp_build_xor(ivec_bld, next_faces[0], ivec_bld->one);
1648 
1649    tmp = lp_build_andnot(ivec_bld, face, c4);
1650    sel_f23 = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, tmp, ivec_bld->one);
1651    tmp = lp_build_add(ivec_bld, face, c2);
1652    next_faces[3] = lp_build_select(ivec_bld, sel_f23, tmp, c3);
1653    next_faces[2] = lp_build_xor(ivec_bld, next_faces[3], ivec_bld->one);
1654 
1655    /*
1656     * new xcoords (for face 012345):
1657     * x < 0.0  : max   max   t     max-t max  max
1658     * x >= 1.0 : 0     0     max-t t     0    0
1659     * y < 0.0  : max   0     max-s s     s    max-s
1660     * y >= 1.0 : max   0     s     max-s s    max-s
1661     *
1662     * ncx[1] = face & ~4 > 1 ? (face == 2 ? max-t : t) : 0
1663     * ncx[0] = max - ncx[1]
1664     * ncx[3] = face > 1 ? (face & 1 ? max-s : s) : (face & 1) ? 0 : max
1665     * ncx[2] = face & ~4 > 1 ? max - ncx[3] : ncx[3]
1666     */
1667    sel_f2 = lp_build_cmp(ivec_bld, PIPE_FUNC_EQUAL, face, c2);
1668    maxmy0 = lp_build_sub(ivec_bld, max_coord, y0);
1669    tmp = lp_build_select(ivec_bld, sel_f2, maxmy0, y0);
1670    next_xcoords[1][0] = lp_build_select(ivec_bld, sel_f23, tmp, ivec_bld->zero);
1671    next_xcoords[0][0] = lp_build_sub(ivec_bld, max_coord, next_xcoords[1][0]);
1672    maxmy1 = lp_build_sub(ivec_bld, max_coord, y1);
1673    tmp = lp_build_select(ivec_bld, sel_f2, maxmy1, y1);
1674    next_xcoords[1][1] = lp_build_select(ivec_bld, sel_f23, tmp, ivec_bld->zero);
1675    next_xcoords[0][1] = lp_build_sub(ivec_bld, max_coord, next_xcoords[1][1]);
1676 
1677    sel_fand1 = lp_build_cmp(ivec_bld, PIPE_FUNC_EQUAL, faceand1, ivec_bld->one);
1678 
1679    tmpsel = lp_build_select(ivec_bld, sel_fand1, ivec_bld->zero, max_coord);
1680    maxmx0 = lp_build_sub(ivec_bld, max_coord, x0);
1681    tmp = lp_build_select(ivec_bld, sel_fand1, maxmx0, x0);
1682    next_xcoords[3][0] = lp_build_select(ivec_bld, sel_f2345, tmp, tmpsel);
1683    tmp = lp_build_sub(ivec_bld, max_coord, next_xcoords[3][0]);
1684    next_xcoords[2][0] = lp_build_select(ivec_bld, sel_f23, tmp, next_xcoords[3][0]);
1685    maxmx1 = lp_build_sub(ivec_bld, max_coord, x1);
1686    tmp = lp_build_select(ivec_bld, sel_fand1, maxmx1, x1);
1687    next_xcoords[3][1] = lp_build_select(ivec_bld, sel_f2345, tmp, tmpsel);
1688    tmp = lp_build_sub(ivec_bld, max_coord, next_xcoords[3][1]);
1689    next_xcoords[2][1] = lp_build_select(ivec_bld, sel_f23, tmp, next_xcoords[3][1]);
1690 
1691    /*
1692     * new ycoords (for face 012345):
1693     * x < 0.0  : t     t     0     max   t    t
1694     * x >= 1.0 : t     t     0     max   t    t
1695     * y < 0.0  : max-s s     0     max   max  0
1696     * y >= 1.0 : s     max-s 0     max   0    max
1697     *
1698     * ncy[0] = face & ~4 > 1 ? (face == 2 ? 0 : max) : t
1699     * ncy[1] = ncy[0]
1700     * ncy[3] = face > 1 ? (face & 1 ? max : 0) : (face & 1) ? max-s : max
1701     * ncx[2] = face & ~4 > 1 ? max - ncx[3] : ncx[3]
1702     */
1703    tmp = lp_build_select(ivec_bld, sel_f2, ivec_bld->zero, max_coord);
1704    next_ycoords[0][0] = lp_build_select(ivec_bld, sel_f23, tmp, y0);
1705    next_ycoords[1][0] = next_ycoords[0][0];
1706    next_ycoords[0][1] = lp_build_select(ivec_bld, sel_f23, tmp, y1);
1707    next_ycoords[1][1] = next_ycoords[0][1];
1708 
1709    tmpsel = lp_build_select(ivec_bld, sel_fand1, maxmx0, x0);
1710    tmp = lp_build_select(ivec_bld, sel_fand1, max_coord, ivec_bld->zero);
1711    next_ycoords[3][0] = lp_build_select(ivec_bld, sel_f2345, tmp, tmpsel);
1712    tmp = lp_build_sub(ivec_bld, max_coord, next_ycoords[3][0]);
1713    next_ycoords[2][0] = lp_build_select(ivec_bld, sel_f23, next_ycoords[3][0], tmp);
1714    tmpsel = lp_build_select(ivec_bld, sel_fand1, maxmx1, x1);
1715    tmp = lp_build_select(ivec_bld, sel_fand1, max_coord, ivec_bld->zero);
1716    next_ycoords[3][1] = lp_build_select(ivec_bld, sel_f2345, tmp, tmpsel);
1717    tmp = lp_build_sub(ivec_bld, max_coord, next_ycoords[3][1]);
1718    next_ycoords[2][1] = lp_build_select(ivec_bld, sel_f23, next_ycoords[3][1], tmp);
1719 }
1720 
1721 
1722 /** Helper used by lp_build_cube_lookup() */
1723 static LLVMValueRef
lp_build_cube_imapos(struct lp_build_context * coord_bld,LLVMValueRef coord)1724 lp_build_cube_imapos(struct lp_build_context *coord_bld, LLVMValueRef coord)
1725 {
1726    /* ima = +0.5 / abs(coord); */
1727    LLVMValueRef posHalf = lp_build_const_vec(coord_bld->gallivm, coord_bld->type, 0.5);
1728    LLVMValueRef absCoord = lp_build_abs(coord_bld, coord);
1729    LLVMValueRef ima = lp_build_div(coord_bld, posHalf, absCoord);
1730    return ima;
1731 }
1732 
1733 
1734 /** Helper for doing 3-wise selection.
1735  * Returns sel1 ? val2 : (sel0 ? val0 : val1).
1736  */
1737 static LLVMValueRef
lp_build_select3(struct lp_build_context * sel_bld,LLVMValueRef sel0,LLVMValueRef sel1,LLVMValueRef val0,LLVMValueRef val1,LLVMValueRef val2)1738 lp_build_select3(struct lp_build_context *sel_bld,
1739                  LLVMValueRef sel0,
1740                  LLVMValueRef sel1,
1741                  LLVMValueRef val0,
1742                  LLVMValueRef val1,
1743                  LLVMValueRef val2)
1744 {
1745    LLVMValueRef tmp;
1746    tmp = lp_build_select(sel_bld, sel0, val0, val1);
1747    return lp_build_select(sel_bld, sel1, val2, tmp);
1748 }
1749 
1750 
1751 /**
1752  * Generate code to do cube face selection and compute per-face texcoords.
1753  */
1754 void
lp_build_cube_lookup(struct lp_build_sample_context * bld,LLVMValueRef * coords,const struct lp_derivatives * derivs_in,LLVMValueRef * rho,struct lp_derivatives * derivs_out,boolean need_derivs)1755 lp_build_cube_lookup(struct lp_build_sample_context *bld,
1756                      LLVMValueRef *coords,
1757                      const struct lp_derivatives *derivs_in, /* optional */
1758                      LLVMValueRef *rho,
1759                      struct lp_derivatives *derivs_out, /* optional */
1760                      boolean need_derivs)
1761 {
1762    struct lp_build_context *coord_bld = &bld->coord_bld;
1763    LLVMBuilderRef builder = bld->gallivm->builder;
1764    struct gallivm_state *gallivm = bld->gallivm;
1765    LLVMValueRef si, ti, ri;
1766 
1767    /*
1768     * Do per-pixel face selection. We cannot however (as we used to do)
1769     * simply calculate the derivs afterwards (which is very bogus for
1770     * explicit derivs btw) because the values would be "random" when
1771     * not all pixels lie on the same face. So what we do here is just
1772     * calculate the derivatives after scaling the coords by the absolute
1773     * value of the inverse major axis, and essentially do rho calculation
1774     * steps as if it were a 3d texture. This is perfect if all pixels hit
1775     * the same face, but not so great at edges, I believe the max error
1776     * should be sqrt(2) with no_rho_approx or 2 otherwise (essentially measuring
1777     * the 3d distance between 2 points on the cube instead of measuring up/down
1778     * the edge). Still this is possibly a win over just selecting the same face
1779     * for all pixels. Unfortunately, something like that doesn't work for
1780     * explicit derivatives.
1781     */
1782    struct lp_build_context *cint_bld = &bld->int_coord_bld;
1783    struct lp_type intctype = cint_bld->type;
1784    LLVMTypeRef coord_vec_type = coord_bld->vec_type;
1785    LLVMTypeRef cint_vec_type = cint_bld->vec_type;
1786    LLVMValueRef as, at, ar, face, face_s, face_t;
1787    LLVMValueRef as_ge_at, maxasat, ar_ge_as_at;
1788    LLVMValueRef snewx, tnewx, snewy, tnewy, snewz, tnewz;
1789    LLVMValueRef tnegi, rnegi;
1790    LLVMValueRef ma, mai, signma, signmabit, imahalfpos;
1791    LLVMValueRef posHalf = lp_build_const_vec(gallivm, coord_bld->type, 0.5);
1792    LLVMValueRef signmask = lp_build_const_int_vec(gallivm, intctype,
1793                                                   1LL << (intctype.width - 1));
1794    LLVMValueRef signshift = lp_build_const_int_vec(gallivm, intctype,
1795                                                    intctype.width -1);
1796    LLVMValueRef facex = lp_build_const_int_vec(gallivm, intctype, PIPE_TEX_FACE_POS_X);
1797    LLVMValueRef facey = lp_build_const_int_vec(gallivm, intctype, PIPE_TEX_FACE_POS_Y);
1798    LLVMValueRef facez = lp_build_const_int_vec(gallivm, intctype, PIPE_TEX_FACE_POS_Z);
1799    LLVMValueRef s = coords[0];
1800    LLVMValueRef t = coords[1];
1801    LLVMValueRef r = coords[2];
1802 
1803    assert(PIPE_TEX_FACE_NEG_X == PIPE_TEX_FACE_POS_X + 1);
1804    assert(PIPE_TEX_FACE_NEG_Y == PIPE_TEX_FACE_POS_Y + 1);
1805    assert(PIPE_TEX_FACE_NEG_Z == PIPE_TEX_FACE_POS_Z + 1);
1806 
1807    /*
1808     * get absolute value (for x/y/z face selection) and sign bit
1809     * (for mirroring minor coords and pos/neg face selection)
1810     * of the original coords.
1811     */
1812    as = lp_build_abs(&bld->coord_bld, s);
1813    at = lp_build_abs(&bld->coord_bld, t);
1814    ar = lp_build_abs(&bld->coord_bld, r);
1815 
1816    /*
1817     * major face determination: select x if x > y else select y
1818     * select z if z >= max(x,y) else select previous result
1819     * if some axis are the same we chose z over y, y over x - the
1820     * dx10 spec seems to ask for it while OpenGL doesn't care (if we
1821     * wouldn't care could save a select or two if using different
1822     * compares and doing at_g_as_ar last since tnewx and tnewz are the
1823     * same).
1824     */
1825    as_ge_at = lp_build_cmp(coord_bld, PIPE_FUNC_GREATER, as, at);
1826    maxasat = lp_build_max(coord_bld, as, at);
1827    ar_ge_as_at = lp_build_cmp(coord_bld, PIPE_FUNC_GEQUAL, ar, maxasat);
1828 
1829    if (need_derivs) {
1830       /*
1831        * XXX: This is really really complex.
1832        * It is a bit overkill to use this for implicit derivatives as well,
1833        * no way this is worth the cost in practice, but seems to be the
1834        * only way for getting accurate and per-pixel lod values.
1835        */
1836       LLVMValueRef ima, imahalf, tmp, ddx[3], ddy[3];
1837       LLVMValueRef madx, mady, madxdivma, madydivma;
1838       LLVMValueRef sdxi, tdxi, rdxi, sdyi, tdyi, rdyi;
1839       LLVMValueRef tdxnegi, rdxnegi, tdynegi, rdynegi;
1840       LLVMValueRef sdxnewx, sdxnewy, sdxnewz, tdxnewx, tdxnewy, tdxnewz;
1841       LLVMValueRef sdynewx, sdynewy, sdynewz, tdynewx, tdynewy, tdynewz;
1842       LLVMValueRef face_sdx, face_tdx, face_sdy, face_tdy;
1843       /*
1844        * s = 1/2 * ( sc / ma + 1)
1845        * t = 1/2 * ( tc / ma + 1)
1846        *
1847        * s' = 1/2 * (sc' * ma - sc * ma') / ma^2
1848        * t' = 1/2 * (tc' * ma - tc * ma') / ma^2
1849        *
1850        * dx.s = 0.5 * (dx.sc - sc * dx.ma / ma) / ma
1851        * dx.t = 0.5 * (dx.tc - tc * dx.ma / ma) / ma
1852        * dy.s = 0.5 * (dy.sc - sc * dy.ma / ma) / ma
1853        * dy.t = 0.5 * (dy.tc - tc * dy.ma / ma) / ma
1854        */
1855 
1856       /* select ma, calculate ima */
1857       ma = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, s, t, r);
1858       mai = LLVMBuildBitCast(builder, ma, cint_vec_type, "");
1859       signmabit = LLVMBuildAnd(builder, mai, signmask, "");
1860       ima = lp_build_div(coord_bld, coord_bld->one, ma);
1861       imahalf = lp_build_mul(coord_bld, posHalf, ima);
1862       imahalfpos = lp_build_abs(coord_bld, imahalf);
1863 
1864       if (!derivs_in) {
1865          ddx[0] = lp_build_ddx(coord_bld, s);
1866          ddx[1] = lp_build_ddx(coord_bld, t);
1867          ddx[2] = lp_build_ddx(coord_bld, r);
1868          ddy[0] = lp_build_ddy(coord_bld, s);
1869          ddy[1] = lp_build_ddy(coord_bld, t);
1870          ddy[2] = lp_build_ddy(coord_bld, r);
1871       }
1872       else {
1873          ddx[0] = derivs_in->ddx[0];
1874          ddx[1] = derivs_in->ddx[1];
1875          ddx[2] = derivs_in->ddx[2];
1876          ddy[0] = derivs_in->ddy[0];
1877          ddy[1] = derivs_in->ddy[1];
1878          ddy[2] = derivs_in->ddy[2];
1879       }
1880 
1881       /* select major derivatives */
1882       madx = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, ddx[0], ddx[1], ddx[2]);
1883       mady = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, ddy[0], ddy[1], ddy[2]);
1884 
1885       si = LLVMBuildBitCast(builder, s, cint_vec_type, "");
1886       ti = LLVMBuildBitCast(builder, t, cint_vec_type, "");
1887       ri = LLVMBuildBitCast(builder, r, cint_vec_type, "");
1888 
1889       sdxi = LLVMBuildBitCast(builder, ddx[0], cint_vec_type, "");
1890       tdxi = LLVMBuildBitCast(builder, ddx[1], cint_vec_type, "");
1891       rdxi = LLVMBuildBitCast(builder, ddx[2], cint_vec_type, "");
1892 
1893       sdyi = LLVMBuildBitCast(builder, ddy[0], cint_vec_type, "");
1894       tdyi = LLVMBuildBitCast(builder, ddy[1], cint_vec_type, "");
1895       rdyi = LLVMBuildBitCast(builder, ddy[2], cint_vec_type, "");
1896 
1897       /*
1898        * compute all possible new s/t coords, which does the mirroring,
1899        * and do the same for derivs minor axes.
1900        * snewx = signma * -r;
1901        * tnewx = -t;
1902        * snewy = s;
1903        * tnewy = signma * r;
1904        * snewz = signma * s;
1905        * tnewz = -t;
1906        */
1907       tnegi = LLVMBuildXor(builder, ti, signmask, "");
1908       rnegi = LLVMBuildXor(builder, ri, signmask, "");
1909       tdxnegi = LLVMBuildXor(builder, tdxi, signmask, "");
1910       rdxnegi = LLVMBuildXor(builder, rdxi, signmask, "");
1911       tdynegi = LLVMBuildXor(builder, tdyi, signmask, "");
1912       rdynegi = LLVMBuildXor(builder, rdyi, signmask, "");
1913 
1914       snewx = LLVMBuildXor(builder, signmabit, rnegi, "");
1915       tnewx = tnegi;
1916       sdxnewx = LLVMBuildXor(builder, signmabit, rdxnegi, "");
1917       tdxnewx = tdxnegi;
1918       sdynewx = LLVMBuildXor(builder, signmabit, rdynegi, "");
1919       tdynewx = tdynegi;
1920 
1921       snewy = si;
1922       tnewy = LLVMBuildXor(builder, signmabit, ri, "");
1923       sdxnewy = sdxi;
1924       tdxnewy = LLVMBuildXor(builder, signmabit, rdxi, "");
1925       sdynewy = sdyi;
1926       tdynewy = LLVMBuildXor(builder, signmabit, rdyi, "");
1927 
1928       snewz = LLVMBuildXor(builder, signmabit, si, "");
1929       tnewz = tnegi;
1930       sdxnewz = LLVMBuildXor(builder, signmabit, sdxi, "");
1931       tdxnewz = tdxnegi;
1932       sdynewz = LLVMBuildXor(builder, signmabit, sdyi, "");
1933       tdynewz = tdynegi;
1934 
1935       /* select the mirrored values */
1936       face = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, facex, facey, facez);
1937       face_s = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, snewx, snewy, snewz);
1938       face_t = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, tnewx, tnewy, tnewz);
1939       face_sdx = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, sdxnewx, sdxnewy, sdxnewz);
1940       face_tdx = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, tdxnewx, tdxnewy, tdxnewz);
1941       face_sdy = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, sdynewx, sdynewy, sdynewz);
1942       face_tdy = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, tdynewx, tdynewy, tdynewz);
1943 
1944       face_s = LLVMBuildBitCast(builder, face_s, coord_vec_type, "");
1945       face_t = LLVMBuildBitCast(builder, face_t, coord_vec_type, "");
1946       face_sdx = LLVMBuildBitCast(builder, face_sdx, coord_vec_type, "");
1947       face_tdx = LLVMBuildBitCast(builder, face_tdx, coord_vec_type, "");
1948       face_sdy = LLVMBuildBitCast(builder, face_sdy, coord_vec_type, "");
1949       face_tdy = LLVMBuildBitCast(builder, face_tdy, coord_vec_type, "");
1950 
1951       /* deriv math, dx.s = 0.5 * (dx.sc - sc * dx.ma / ma) / ma */
1952       madxdivma = lp_build_mul(coord_bld, madx, ima);
1953       tmp = lp_build_mul(coord_bld, madxdivma, face_s);
1954       tmp = lp_build_sub(coord_bld, face_sdx, tmp);
1955       derivs_out->ddx[0] = lp_build_mul(coord_bld, tmp, imahalf);
1956 
1957       /* dx.t = 0.5 * (dx.tc - tc * dx.ma / ma) / ma */
1958       tmp = lp_build_mul(coord_bld, madxdivma, face_t);
1959       tmp = lp_build_sub(coord_bld, face_tdx, tmp);
1960       derivs_out->ddx[1] = lp_build_mul(coord_bld, tmp, imahalf);
1961 
1962       /* dy.s = 0.5 * (dy.sc - sc * dy.ma / ma) / ma */
1963       madydivma = lp_build_mul(coord_bld, mady, ima);
1964       tmp = lp_build_mul(coord_bld, madydivma, face_s);
1965       tmp = lp_build_sub(coord_bld, face_sdy, tmp);
1966       derivs_out->ddy[0] = lp_build_mul(coord_bld, tmp, imahalf);
1967 
1968       /* dy.t = 0.5 * (dy.tc - tc * dy.ma / ma) / ma */
1969       tmp = lp_build_mul(coord_bld, madydivma, face_t);
1970       tmp = lp_build_sub(coord_bld, face_tdy, tmp);
1971       derivs_out->ddy[1] = lp_build_mul(coord_bld, tmp, imahalf);
1972 
1973       signma = LLVMBuildLShr(builder, mai, signshift, "");
1974       coords[2] = LLVMBuildOr(builder, face, signma, "face");
1975 
1976       /* project coords */
1977       face_s = lp_build_mul(coord_bld, face_s, imahalfpos);
1978       face_t = lp_build_mul(coord_bld, face_t, imahalfpos);
1979 
1980       coords[0] = lp_build_add(coord_bld, face_s, posHalf);
1981       coords[1] = lp_build_add(coord_bld, face_t, posHalf);
1982 
1983       return;
1984    }
1985 
1986    ma = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, s, t, r);
1987    mai = LLVMBuildBitCast(builder, ma, cint_vec_type, "");
1988    signmabit = LLVMBuildAnd(builder, mai, signmask, "");
1989 
1990    si = LLVMBuildBitCast(builder, s, cint_vec_type, "");
1991    ti = LLVMBuildBitCast(builder, t, cint_vec_type, "");
1992    ri = LLVMBuildBitCast(builder, r, cint_vec_type, "");
1993 
1994    /*
1995     * compute all possible new s/t coords, which does the mirroring
1996     * snewx = signma * -r;
1997     * tnewx = -t;
1998     * snewy = s;
1999     * tnewy = signma * r;
2000     * snewz = signma * s;
2001     * tnewz = -t;
2002     */
2003    tnegi = LLVMBuildXor(builder, ti, signmask, "");
2004    rnegi = LLVMBuildXor(builder, ri, signmask, "");
2005 
2006    snewx = LLVMBuildXor(builder, signmabit, rnegi, "");
2007    tnewx = tnegi;
2008 
2009    snewy = si;
2010    tnewy = LLVMBuildXor(builder, signmabit, ri, "");
2011 
2012    snewz = LLVMBuildXor(builder, signmabit, si, "");
2013    tnewz = tnegi;
2014 
2015    /* select the mirrored values */
2016    face_s = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, snewx, snewy, snewz);
2017    face_t = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, tnewx, tnewy, tnewz);
2018    face = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, facex, facey, facez);
2019 
2020    face_s = LLVMBuildBitCast(builder, face_s, coord_vec_type, "");
2021    face_t = LLVMBuildBitCast(builder, face_t, coord_vec_type, "");
2022 
2023    /* add +1 for neg face */
2024    /* XXX with AVX probably want to use another select here -
2025     * as long as we ensure vblendvps gets used we can actually
2026     * skip the comparison and just use sign as a "mask" directly.
2027     */
2028    signma = LLVMBuildLShr(builder, mai, signshift, "");
2029    coords[2] = LLVMBuildOr(builder, face, signma, "face");
2030 
2031    /* project coords */
2032    if (!need_derivs) {
2033       imahalfpos = lp_build_cube_imapos(coord_bld, ma);
2034       face_s = lp_build_mul(coord_bld, face_s, imahalfpos);
2035       face_t = lp_build_mul(coord_bld, face_t, imahalfpos);
2036    }
2037 
2038    coords[0] = lp_build_add(coord_bld, face_s, posHalf);
2039    coords[1] = lp_build_add(coord_bld, face_t, posHalf);
2040 }
2041 
2042 
2043 /**
2044  * Compute the partial offset of a pixel block along an arbitrary axis.
2045  *
2046  * @param coord   coordinate in pixels
2047  * @param stride  number of bytes between rows of successive pixel blocks
2048  * @param block_length  number of pixels in a pixels block along the coordinate
2049  *                      axis
2050  * @param out_offset    resulting relative offset of the pixel block in bytes
2051  * @param out_subcoord  resulting sub-block pixel coordinate
2052  */
2053 void
lp_build_sample_partial_offset(struct lp_build_context * bld,unsigned block_length,LLVMValueRef coord,LLVMValueRef stride,LLVMValueRef * out_offset,LLVMValueRef * out_subcoord)2054 lp_build_sample_partial_offset(struct lp_build_context *bld,
2055                                unsigned block_length,
2056                                LLVMValueRef coord,
2057                                LLVMValueRef stride,
2058                                LLVMValueRef *out_offset,
2059                                LLVMValueRef *out_subcoord)
2060 {
2061    LLVMBuilderRef builder = bld->gallivm->builder;
2062    LLVMValueRef offset;
2063    LLVMValueRef subcoord;
2064 
2065    if (block_length == 1) {
2066       subcoord = bld->zero;
2067    }
2068    else {
2069       /*
2070        * Pixel blocks have power of two dimensions. LLVM should convert the
2071        * rem/div to bit arithmetic.
2072        * TODO: Verify this.
2073        * It does indeed BUT it does transform it to scalar (and back) when doing so
2074        * (using roughly extract, shift/and, mov, unpack) (llvm 2.7).
2075        * The generated code looks seriously unfunny and is quite expensive.
2076        */
2077 #if 0
2078       LLVMValueRef block_width = lp_build_const_int_vec(bld->type, block_length);
2079       subcoord = LLVMBuildURem(builder, coord, block_width, "");
2080       coord    = LLVMBuildUDiv(builder, coord, block_width, "");
2081 #else
2082       unsigned logbase2 = util_logbase2(block_length);
2083       LLVMValueRef block_shift = lp_build_const_int_vec(bld->gallivm, bld->type, logbase2);
2084       LLVMValueRef block_mask = lp_build_const_int_vec(bld->gallivm, bld->type, block_length - 1);
2085       subcoord = LLVMBuildAnd(builder, coord, block_mask, "");
2086       coord = LLVMBuildLShr(builder, coord, block_shift, "");
2087 #endif
2088    }
2089 
2090    offset = lp_build_mul(bld, coord, stride);
2091 
2092    assert(out_offset);
2093    assert(out_subcoord);
2094 
2095    *out_offset = offset;
2096    *out_subcoord = subcoord;
2097 }
2098 
2099 
2100 /**
2101  * Compute the offset of a pixel block.
2102  *
2103  * x, y, z, y_stride, z_stride are vectors, and they refer to pixels.
2104  *
2105  * Returns the relative offset and i,j sub-block coordinates
2106  */
2107 void
lp_build_sample_offset(struct lp_build_context * bld,const struct util_format_description * format_desc,LLVMValueRef x,LLVMValueRef y,LLVMValueRef z,LLVMValueRef y_stride,LLVMValueRef z_stride,LLVMValueRef * out_offset,LLVMValueRef * out_i,LLVMValueRef * out_j)2108 lp_build_sample_offset(struct lp_build_context *bld,
2109                        const struct util_format_description *format_desc,
2110                        LLVMValueRef x,
2111                        LLVMValueRef y,
2112                        LLVMValueRef z,
2113                        LLVMValueRef y_stride,
2114                        LLVMValueRef z_stride,
2115                        LLVMValueRef *out_offset,
2116                        LLVMValueRef *out_i,
2117                        LLVMValueRef *out_j)
2118 {
2119    LLVMValueRef x_stride;
2120    LLVMValueRef offset;
2121 
2122    x_stride = lp_build_const_vec(bld->gallivm, bld->type,
2123                                  format_desc->block.bits/8);
2124 
2125    lp_build_sample_partial_offset(bld,
2126                                   format_desc->block.width,
2127                                   x, x_stride,
2128                                   &offset, out_i);
2129 
2130    if (y && y_stride) {
2131       LLVMValueRef y_offset;
2132       lp_build_sample_partial_offset(bld,
2133                                      format_desc->block.height,
2134                                      y, y_stride,
2135                                      &y_offset, out_j);
2136       offset = lp_build_add(bld, offset, y_offset);
2137    }
2138    else {
2139       *out_j = bld->zero;
2140    }
2141 
2142    if (z && z_stride) {
2143       LLVMValueRef z_offset;
2144       LLVMValueRef k;
2145       lp_build_sample_partial_offset(bld,
2146                                      1, /* pixel blocks are always 2D */
2147                                      z, z_stride,
2148                                      &z_offset, &k);
2149       offset = lp_build_add(bld, offset, z_offset);
2150    }
2151 
2152    *out_offset = offset;
2153 }
2154 
2155 static LLVMValueRef
lp_build_sample_min(struct lp_build_context * bld,LLVMValueRef x,LLVMValueRef v0,LLVMValueRef v1)2156 lp_build_sample_min(struct lp_build_context *bld,
2157                     LLVMValueRef x,
2158                     LLVMValueRef v0,
2159                     LLVMValueRef v1)
2160 {
2161    /* if the incoming LERP weight is 0 then the min/max
2162     * should ignore that value. */
2163    LLVMValueRef mask = lp_build_compare(bld->gallivm,
2164                                         bld->type,
2165                                         PIPE_FUNC_NOTEQUAL,
2166                                         x, bld->zero);
2167    LLVMValueRef min = lp_build_min(bld, v0, v1);
2168 
2169    return lp_build_select(bld, mask, min, v0);
2170 }
2171 
2172 static LLVMValueRef
lp_build_sample_max(struct lp_build_context * bld,LLVMValueRef x,LLVMValueRef v0,LLVMValueRef v1)2173 lp_build_sample_max(struct lp_build_context *bld,
2174                     LLVMValueRef x,
2175                     LLVMValueRef v0,
2176                     LLVMValueRef v1)
2177 {
2178    /* if the incoming LERP weight is 0 then the min/max
2179     * should ignore that value. */
2180    LLVMValueRef mask = lp_build_compare(bld->gallivm,
2181                                         bld->type,
2182                                         PIPE_FUNC_NOTEQUAL,
2183                                         x, bld->zero);
2184    LLVMValueRef max = lp_build_max(bld, v0, v1);
2185 
2186    return lp_build_select(bld, mask, max, v0);
2187 }
2188 
2189 static LLVMValueRef
lp_build_sample_min_2d(struct lp_build_context * bld,LLVMValueRef x,LLVMValueRef y,LLVMValueRef a,LLVMValueRef b,LLVMValueRef c,LLVMValueRef d)2190 lp_build_sample_min_2d(struct lp_build_context *bld,
2191                        LLVMValueRef x,
2192                        LLVMValueRef y,
2193                        LLVMValueRef a,
2194                        LLVMValueRef b,
2195                        LLVMValueRef c,
2196                        LLVMValueRef d)
2197 {
2198    LLVMValueRef v0 = lp_build_sample_min(bld, x, a, b);
2199    LLVMValueRef v1 = lp_build_sample_min(bld, x, c, d);
2200    return lp_build_sample_min(bld, y, v0, v1);
2201 }
2202 
2203 static LLVMValueRef
lp_build_sample_max_2d(struct lp_build_context * bld,LLVMValueRef x,LLVMValueRef y,LLVMValueRef a,LLVMValueRef b,LLVMValueRef c,LLVMValueRef d)2204 lp_build_sample_max_2d(struct lp_build_context *bld,
2205                        LLVMValueRef x,
2206                        LLVMValueRef y,
2207                        LLVMValueRef a,
2208                        LLVMValueRef b,
2209                        LLVMValueRef c,
2210                        LLVMValueRef d)
2211 {
2212    LLVMValueRef v0 = lp_build_sample_max(bld, x, a, b);
2213    LLVMValueRef v1 = lp_build_sample_max(bld, x, c, d);
2214    return lp_build_sample_max(bld, y, v0, v1);
2215 }
2216 
2217 static LLVMValueRef
lp_build_sample_min_3d(struct lp_build_context * bld,LLVMValueRef x,LLVMValueRef y,LLVMValueRef z,LLVMValueRef a,LLVMValueRef b,LLVMValueRef c,LLVMValueRef d,LLVMValueRef e,LLVMValueRef f,LLVMValueRef g,LLVMValueRef h)2218 lp_build_sample_min_3d(struct lp_build_context *bld,
2219                 LLVMValueRef x,
2220                 LLVMValueRef y,
2221                 LLVMValueRef z,
2222                 LLVMValueRef a, LLVMValueRef b,
2223                 LLVMValueRef c, LLVMValueRef d,
2224                 LLVMValueRef e, LLVMValueRef f,
2225                 LLVMValueRef g, LLVMValueRef h)
2226 {
2227    LLVMValueRef v0 = lp_build_sample_min_2d(bld, x, y, a, b, c, d);
2228    LLVMValueRef v1 = lp_build_sample_min_2d(bld, x, y, e, f, g, h);
2229    return lp_build_sample_min(bld, z, v0, v1);
2230 }
2231 
2232 static LLVMValueRef
lp_build_sample_max_3d(struct lp_build_context * bld,LLVMValueRef x,LLVMValueRef y,LLVMValueRef z,LLVMValueRef a,LLVMValueRef b,LLVMValueRef c,LLVMValueRef d,LLVMValueRef e,LLVMValueRef f,LLVMValueRef g,LLVMValueRef h)2233 lp_build_sample_max_3d(struct lp_build_context *bld,
2234                        LLVMValueRef x,
2235                        LLVMValueRef y,
2236                        LLVMValueRef z,
2237                        LLVMValueRef a, LLVMValueRef b,
2238                        LLVMValueRef c, LLVMValueRef d,
2239                        LLVMValueRef e, LLVMValueRef f,
2240                        LLVMValueRef g, LLVMValueRef h)
2241 {
2242    LLVMValueRef v0 = lp_build_sample_max_2d(bld, x, y, a, b, c, d);
2243    LLVMValueRef v1 = lp_build_sample_max_2d(bld, x, y, e, f, g, h);
2244    return lp_build_sample_max(bld, z, v0, v1);
2245 }
2246 
2247 void
lp_build_reduce_filter(struct lp_build_context * bld,enum pipe_tex_reduction_mode mode,unsigned flags,unsigned num_chan,LLVMValueRef x,LLVMValueRef * v00,LLVMValueRef * v01,LLVMValueRef * out)2248 lp_build_reduce_filter(struct lp_build_context *bld,
2249                        enum pipe_tex_reduction_mode mode,
2250                        unsigned flags,
2251                        unsigned num_chan,
2252                        LLVMValueRef x,
2253                        LLVMValueRef *v00,
2254                        LLVMValueRef *v01,
2255                        LLVMValueRef *out)
2256 {
2257    unsigned chan;
2258    switch (mode) {
2259    case PIPE_TEX_REDUCTION_MIN:
2260       for (chan = 0; chan < num_chan; chan++)
2261          out[chan] = lp_build_sample_min(bld, x, v00[chan], v01[chan]);
2262       break;
2263    case PIPE_TEX_REDUCTION_MAX:
2264       for (chan = 0; chan < num_chan; chan++)
2265          out[chan] = lp_build_sample_max(bld, x, v00[chan], v01[chan]);
2266       break;
2267    case PIPE_TEX_REDUCTION_WEIGHTED_AVERAGE:
2268    default:
2269       for (chan = 0; chan < num_chan; chan++)
2270          out[chan] = lp_build_lerp(bld, x, v00[chan], v01[chan], flags);
2271       break;
2272    }
2273 }
2274 
2275 void
lp_build_reduce_filter_2d(struct lp_build_context * bld,enum pipe_tex_reduction_mode mode,unsigned flags,unsigned num_chan,LLVMValueRef x,LLVMValueRef y,LLVMValueRef * v00,LLVMValueRef * v01,LLVMValueRef * v10,LLVMValueRef * v11,LLVMValueRef * out)2276 lp_build_reduce_filter_2d(struct lp_build_context *bld,
2277                           enum pipe_tex_reduction_mode mode,
2278                           unsigned flags,
2279                           unsigned num_chan,
2280                           LLVMValueRef x,
2281                           LLVMValueRef y,
2282                           LLVMValueRef *v00,
2283                           LLVMValueRef *v01,
2284                           LLVMValueRef *v10,
2285                           LLVMValueRef *v11,
2286                           LLVMValueRef *out)
2287 {
2288    unsigned chan;
2289    switch (mode) {
2290    case PIPE_TEX_REDUCTION_MIN:
2291       for (chan = 0; chan < num_chan; chan++)
2292          out[chan] = lp_build_sample_min_2d(bld, x, y, v00[chan], v01[chan], v10[chan], v11[chan]);
2293       break;
2294    case PIPE_TEX_REDUCTION_MAX:
2295       for (chan = 0; chan < num_chan; chan++)
2296          out[chan] = lp_build_sample_max_2d(bld, x, y, v00[chan], v01[chan], v10[chan], v11[chan]);
2297       break;
2298    case PIPE_TEX_REDUCTION_WEIGHTED_AVERAGE:
2299    default:
2300       for (chan = 0; chan < num_chan; chan++)
2301          out[chan] = lp_build_lerp_2d(bld, x, y, v00[chan], v01[chan], v10[chan], v11[chan], flags);
2302       break;
2303    }
2304 }
2305 
2306 void
lp_build_reduce_filter_3d(struct lp_build_context * bld,enum pipe_tex_reduction_mode mode,unsigned flags,unsigned num_chan,LLVMValueRef x,LLVMValueRef y,LLVMValueRef z,LLVMValueRef * v000,LLVMValueRef * v001,LLVMValueRef * v010,LLVMValueRef * v011,LLVMValueRef * v100,LLVMValueRef * v101,LLVMValueRef * v110,LLVMValueRef * v111,LLVMValueRef * out)2307 lp_build_reduce_filter_3d(struct lp_build_context *bld,
2308                           enum pipe_tex_reduction_mode mode,
2309                           unsigned flags,
2310                           unsigned num_chan,
2311                           LLVMValueRef x,
2312                           LLVMValueRef y,
2313                           LLVMValueRef z,
2314                           LLVMValueRef *v000,
2315                           LLVMValueRef *v001,
2316                           LLVMValueRef *v010,
2317                           LLVMValueRef *v011,
2318                           LLVMValueRef *v100,
2319                           LLVMValueRef *v101,
2320                           LLVMValueRef *v110,
2321                           LLVMValueRef *v111,
2322                           LLVMValueRef *out)
2323 {
2324    unsigned chan;
2325    switch (mode) {
2326    case PIPE_TEX_REDUCTION_MIN:
2327       for (chan = 0; chan < num_chan; chan++)
2328          out[chan] = lp_build_sample_min_3d(bld, x, y, z,
2329                                      v000[chan], v001[chan], v010[chan], v011[chan],
2330                                      v100[chan], v101[chan], v110[chan], v111[chan]);
2331       break;
2332    case PIPE_TEX_REDUCTION_MAX:
2333       for (chan = 0; chan < num_chan; chan++)
2334          out[chan] = lp_build_sample_max_3d(bld, x, y, z,
2335                                      v000[chan], v001[chan], v010[chan], v011[chan],
2336                                      v100[chan], v101[chan], v110[chan], v111[chan]);
2337       break;
2338    case PIPE_TEX_REDUCTION_WEIGHTED_AVERAGE:
2339    default:
2340       for (chan = 0; chan < num_chan; chan++)
2341          out[chan] = lp_build_lerp_3d(bld, x, y, z,
2342                                       v000[chan], v001[chan], v010[chan], v011[chan],
2343                                       v100[chan], v101[chan], v110[chan], v111[chan],
2344                                       flags);
2345       break;
2346    }
2347 }
2348 
2349 /*
2350  * generated from
2351  * const float alpha = 2;
2352  * for (unsigned i = 0; i < WEIGHT_LUT_SIZE; i++) {
2353  *    const float r2 = (float) i / (float) (WEIGHT_LUT_SIZE - 1);
2354  *    const float weight = (float)expf(-alpha * r2);
2355  */
2356 static const float aniso_filter_table[1024] = {
2357    1.000000, 0.998047, 0.996098, 0.994152, 0.992210, 0.990272, 0.988338, 0.986408,
2358    0.984481, 0.982559, 0.980640, 0.978724, 0.976813, 0.974905, 0.973001, 0.971100,
2359    0.969204, 0.967311, 0.965421, 0.963536, 0.961654, 0.959776, 0.957901, 0.956030,
2360    0.954163, 0.952299, 0.950439, 0.948583, 0.946730, 0.944881, 0.943036, 0.941194,
2361    0.939356, 0.937521, 0.935690, 0.933862, 0.932038, 0.930218, 0.928401, 0.926588,
2362    0.924778, 0.922972, 0.921169, 0.919370, 0.917575, 0.915782, 0.913994, 0.912209,
2363    0.910427, 0.908649, 0.906874, 0.905103, 0.903335, 0.901571, 0.899810, 0.898052,
2364    0.896298, 0.894548, 0.892801, 0.891057, 0.889317, 0.887580, 0.885846, 0.884116,
2365    0.882389, 0.880666, 0.878946, 0.877229, 0.875516, 0.873806, 0.872099, 0.870396,
2366    0.868696, 0.866999, 0.865306, 0.863616, 0.861929, 0.860245, 0.858565, 0.856888,
2367    0.855215, 0.853544, 0.851877, 0.850213, 0.848553, 0.846896, 0.845241, 0.843591,
2368    0.841943, 0.840299, 0.838657, 0.837019, 0.835385, 0.833753, 0.832124, 0.830499,
2369    0.828877, 0.827258, 0.825643, 0.824030, 0.822421, 0.820814, 0.819211, 0.817611,
2370    0.816014, 0.814420, 0.812830, 0.811242, 0.809658, 0.808076, 0.806498, 0.804923,
2371    0.803351, 0.801782, 0.800216, 0.798653, 0.797093, 0.795536, 0.793982, 0.792432,
2372    0.790884, 0.789339, 0.787798, 0.786259, 0.784723, 0.783191, 0.781661, 0.780134,
2373    0.778610, 0.777090, 0.775572, 0.774057, 0.772545, 0.771037, 0.769531, 0.768028,
2374    0.766528, 0.765030, 0.763536, 0.762045, 0.760557, 0.759071, 0.757589, 0.756109,
2375    0.754632, 0.753158, 0.751687, 0.750219, 0.748754, 0.747291, 0.745832, 0.744375,
2376    0.742921, 0.741470, 0.740022, 0.738577, 0.737134, 0.735694, 0.734258, 0.732823,
2377    0.731392, 0.729964, 0.728538, 0.727115, 0.725695, 0.724278, 0.722863, 0.721451,
2378    0.720042, 0.718636, 0.717232, 0.715831, 0.714433, 0.713038, 0.711645, 0.710255,
2379    0.708868, 0.707483, 0.706102, 0.704723, 0.703346, 0.701972, 0.700601, 0.699233,
2380    0.697867, 0.696504, 0.695144, 0.693786, 0.692431, 0.691079, 0.689729, 0.688382,
2381    0.687037, 0.685696, 0.684356, 0.683020, 0.681686, 0.680354, 0.679025, 0.677699,
2382    0.676376, 0.675054, 0.673736, 0.672420, 0.671107, 0.669796, 0.668488, 0.667182,
2383    0.665879, 0.664579, 0.663281, 0.661985, 0.660692, 0.659402, 0.658114, 0.656828,
2384    0.655546, 0.654265, 0.652987, 0.651712, 0.650439, 0.649169, 0.647901, 0.646635,
2385    0.645372, 0.644112, 0.642854, 0.641598, 0.640345, 0.639095, 0.637846, 0.636601,
2386    0.635357, 0.634116, 0.632878, 0.631642, 0.630408, 0.629177, 0.627948, 0.626721,
2387    0.625497, 0.624276, 0.623056, 0.621839, 0.620625, 0.619413, 0.618203, 0.616996,
2388    0.615790, 0.614588, 0.613387, 0.612189, 0.610994, 0.609800, 0.608609, 0.607421,
2389    0.606234, 0.605050, 0.603868, 0.602689, 0.601512, 0.600337, 0.599165, 0.597994,
2390    0.596826, 0.595661, 0.594497, 0.593336, 0.592177, 0.591021, 0.589866, 0.588714,
2391    0.587564, 0.586417, 0.585272, 0.584128, 0.582988, 0.581849, 0.580712, 0.579578,
2392    0.578446, 0.577317, 0.576189, 0.575064, 0.573940, 0.572819, 0.571701, 0.570584,
2393    0.569470, 0.568357, 0.567247, 0.566139, 0.565034, 0.563930, 0.562829, 0.561729,
2394    0.560632, 0.559537, 0.558444, 0.557354, 0.556265, 0.555179, 0.554094, 0.553012,
2395    0.551932, 0.550854, 0.549778, 0.548704, 0.547633, 0.546563, 0.545496, 0.544430,
2396    0.543367, 0.542306, 0.541246, 0.540189, 0.539134, 0.538081, 0.537030, 0.535981,
2397    0.534935, 0.533890, 0.532847, 0.531806, 0.530768, 0.529731, 0.528696, 0.527664,
2398    0.526633, 0.525604, 0.524578, 0.523553, 0.522531, 0.521510, 0.520492, 0.519475,
2399    0.518460, 0.517448, 0.516437, 0.515429, 0.514422, 0.513417, 0.512414, 0.511414,
2400    0.510415, 0.509418, 0.508423, 0.507430, 0.506439, 0.505450, 0.504462, 0.503477,
2401    0.502494, 0.501512, 0.500533, 0.499555, 0.498580, 0.497606, 0.496634, 0.495664,
2402    0.494696, 0.493730, 0.492765, 0.491803, 0.490842, 0.489884, 0.488927, 0.487972,
2403    0.487019, 0.486068, 0.485118, 0.484171, 0.483225, 0.482281, 0.481339, 0.480399,
2404    0.479461, 0.478524, 0.477590, 0.476657, 0.475726, 0.474797, 0.473870, 0.472944,
2405    0.472020, 0.471098, 0.470178, 0.469260, 0.468343, 0.467429, 0.466516, 0.465605,
2406    0.464695, 0.463788, 0.462882, 0.461978, 0.461075, 0.460175, 0.459276, 0.458379,
2407    0.457484, 0.456590, 0.455699, 0.454809, 0.453920, 0.453034, 0.452149, 0.451266,
2408    0.450384, 0.449505, 0.448627, 0.447751, 0.446876, 0.446003, 0.445132, 0.444263,
2409    0.443395, 0.442529, 0.441665, 0.440802, 0.439941, 0.439082, 0.438224, 0.437368,
2410    0.436514, 0.435662, 0.434811, 0.433961, 0.433114, 0.432268, 0.431424, 0.430581,
2411    0.429740, 0.428901, 0.428063, 0.427227, 0.426393, 0.425560, 0.424729, 0.423899,
2412    0.423071, 0.422245, 0.421420, 0.420597, 0.419776, 0.418956, 0.418137, 0.417321,
2413    0.416506, 0.415692, 0.414880, 0.414070, 0.413261, 0.412454, 0.411648, 0.410844,
2414    0.410042, 0.409241, 0.408442, 0.407644, 0.406848, 0.406053, 0.405260, 0.404469,
2415    0.403679, 0.402890, 0.402103, 0.401318, 0.400534, 0.399752, 0.398971, 0.398192,
2416    0.397414, 0.396638, 0.395863, 0.395090, 0.394319, 0.393548, 0.392780, 0.392013,
2417    0.391247, 0.390483, 0.389720, 0.388959, 0.388199, 0.387441, 0.386684, 0.385929,
2418    0.385175, 0.384423, 0.383672, 0.382923, 0.382175, 0.381429, 0.380684, 0.379940,
2419    0.379198, 0.378457, 0.377718, 0.376980, 0.376244, 0.375509, 0.374776, 0.374044,
2420    0.373313, 0.372584, 0.371856, 0.371130, 0.370405, 0.369682, 0.368960, 0.368239,
2421    0.367520, 0.366802, 0.366086, 0.365371, 0.364657, 0.363945, 0.363234, 0.362525,
2422    0.361817, 0.361110, 0.360405, 0.359701, 0.358998, 0.358297, 0.357597, 0.356899,
2423    0.356202, 0.355506, 0.354812, 0.354119, 0.353427, 0.352737, 0.352048, 0.351360,
2424    0.350674, 0.349989, 0.349306, 0.348623, 0.347942, 0.347263, 0.346585, 0.345908,
2425    0.345232, 0.344558, 0.343885, 0.343213, 0.342543, 0.341874, 0.341206, 0.340540,
2426    0.339874, 0.339211, 0.338548, 0.337887, 0.337227, 0.336568, 0.335911, 0.335255,
2427    0.334600, 0.333947, 0.333294, 0.332643, 0.331994, 0.331345, 0.330698, 0.330052,
2428    0.329408, 0.328764, 0.328122, 0.327481, 0.326842, 0.326203, 0.325566, 0.324930,
2429    0.324296, 0.323662, 0.323030, 0.322399, 0.321770, 0.321141, 0.320514, 0.319888,
2430    0.319263, 0.318639, 0.318017, 0.317396, 0.316776, 0.316157, 0.315540, 0.314924,
2431    0.314309, 0.313695, 0.313082, 0.312470, 0.311860, 0.311251, 0.310643, 0.310036,
2432    0.309431, 0.308827, 0.308223, 0.307621, 0.307021, 0.306421, 0.305822, 0.305225,
2433    0.304629, 0.304034, 0.303440, 0.302847, 0.302256, 0.301666, 0.301076, 0.300488,
2434    0.299902, 0.299316, 0.298731, 0.298148, 0.297565, 0.296984, 0.296404, 0.295825,
2435    0.295247, 0.294671, 0.294095, 0.293521, 0.292948, 0.292375, 0.291804, 0.291234,
2436    0.290666, 0.290098, 0.289531, 0.288966, 0.288401, 0.287838, 0.287276, 0.286715,
2437    0.286155, 0.285596, 0.285038, 0.284482, 0.283926, 0.283371, 0.282818, 0.282266,
2438    0.281714, 0.281164, 0.280615, 0.280067, 0.279520, 0.278974, 0.278429, 0.277885,
2439    0.277342, 0.276801, 0.276260, 0.275721, 0.275182, 0.274645, 0.274108, 0.273573,
2440    0.273038, 0.272505, 0.271973, 0.271442, 0.270912, 0.270382, 0.269854, 0.269327,
2441    0.268801, 0.268276, 0.267752, 0.267229, 0.266707, 0.266186, 0.265667, 0.265148,
2442    0.264630, 0.264113, 0.263597, 0.263082, 0.262568, 0.262056, 0.261544, 0.261033,
2443    0.260523, 0.260014, 0.259506, 0.259000, 0.258494, 0.257989, 0.257485, 0.256982,
2444    0.256480, 0.255979, 0.255479, 0.254980, 0.254482, 0.253985, 0.253489, 0.252994,
2445    0.252500, 0.252007, 0.251515, 0.251023, 0.250533, 0.250044, 0.249555, 0.249068,
2446    0.248582, 0.248096, 0.247611, 0.247128, 0.246645, 0.246163, 0.245683, 0.245203,
2447    0.244724, 0.244246, 0.243769, 0.243293, 0.242818, 0.242343, 0.241870, 0.241398,
2448    0.240926, 0.240456, 0.239986, 0.239517, 0.239049, 0.238583, 0.238117, 0.237651,
2449    0.237187, 0.236724, 0.236262, 0.235800, 0.235340, 0.234880, 0.234421, 0.233963,
2450    0.233506, 0.233050, 0.232595, 0.232141, 0.231688, 0.231235, 0.230783, 0.230333,
2451    0.229883, 0.229434, 0.228986, 0.228538, 0.228092, 0.227647, 0.227202, 0.226758,
2452    0.226315, 0.225873, 0.225432, 0.224992, 0.224552, 0.224114, 0.223676, 0.223239,
2453    0.222803, 0.222368, 0.221934, 0.221500, 0.221068, 0.220636, 0.220205, 0.219775,
2454    0.219346, 0.218917, 0.218490, 0.218063, 0.217637, 0.217212, 0.216788, 0.216364,
2455    0.215942, 0.215520, 0.215099, 0.214679, 0.214260, 0.213841, 0.213423, 0.213007,
2456    0.212591, 0.212175, 0.211761, 0.211347, 0.210935, 0.210523, 0.210111, 0.209701,
2457    0.209291, 0.208883, 0.208475, 0.208068, 0.207661, 0.207256, 0.206851, 0.206447,
2458    0.206044, 0.205641, 0.205239, 0.204839, 0.204439, 0.204039, 0.203641, 0.203243,
2459    0.202846, 0.202450, 0.202054, 0.201660, 0.201266, 0.200873, 0.200481, 0.200089,
2460    0.199698, 0.199308, 0.198919, 0.198530, 0.198143, 0.197756, 0.197369, 0.196984,
2461    0.196599, 0.196215, 0.195832, 0.195449, 0.195068, 0.194687, 0.194306, 0.193927,
2462    0.193548, 0.193170, 0.192793, 0.192416, 0.192041, 0.191665, 0.191291, 0.190917,
2463    0.190545, 0.190172, 0.189801, 0.189430, 0.189060, 0.188691, 0.188323, 0.187955,
2464    0.187588, 0.187221, 0.186856, 0.186491, 0.186126, 0.185763, 0.185400, 0.185038,
2465    0.184676, 0.184316, 0.183956, 0.183597, 0.183238, 0.182880, 0.182523, 0.182166,
2466    0.181811, 0.181455, 0.181101, 0.180747, 0.180394, 0.180042, 0.179690, 0.179339,
2467    0.178989, 0.178640, 0.178291, 0.177942, 0.177595, 0.177248, 0.176902, 0.176556,
2468    0.176211, 0.175867, 0.175524, 0.175181, 0.174839, 0.174497, 0.174157, 0.173816,
2469    0.173477, 0.173138, 0.172800, 0.172462, 0.172126, 0.171789, 0.171454, 0.171119,
2470    0.170785, 0.170451, 0.170118, 0.169786, 0.169454, 0.169124, 0.168793, 0.168463,
2471    0.168134, 0.167806, 0.167478, 0.167151, 0.166825, 0.166499, 0.166174, 0.165849,
2472    0.165525, 0.165202, 0.164879, 0.164557, 0.164236, 0.163915, 0.163595, 0.163275,
2473    0.162957, 0.162638, 0.162321, 0.162004, 0.161687, 0.161371, 0.161056, 0.160742,
2474    0.160428, 0.160114, 0.159802, 0.159489, 0.159178, 0.158867, 0.158557, 0.158247,
2475    0.157938, 0.157630, 0.157322, 0.157014, 0.156708, 0.156402, 0.156096, 0.155791,
2476    0.155487, 0.155183, 0.154880, 0.154578, 0.154276, 0.153975, 0.153674, 0.153374,
2477    0.153074, 0.152775, 0.152477, 0.152179, 0.151882, 0.151585, 0.151289, 0.150994,
2478    0.150699, 0.150404, 0.150111, 0.149817, 0.149525, 0.149233, 0.148941, 0.148650,
2479    0.148360, 0.148070, 0.147781, 0.147492, 0.147204, 0.146917, 0.146630, 0.146344,
2480    0.146058, 0.145772, 0.145488, 0.145204, 0.144920, 0.144637, 0.144354, 0.144072,
2481    0.143791, 0.143510, 0.143230, 0.142950, 0.142671, 0.142392, 0.142114, 0.141837,
2482    0.141560, 0.141283, 0.141007, 0.140732, 0.140457, 0.140183, 0.139909, 0.139636,
2483    0.139363, 0.139091, 0.138819, 0.138548, 0.138277, 0.138007, 0.137738, 0.137469,
2484    0.137200, 0.136932, 0.136665, 0.136398, 0.136131, 0.135865, 0.135600, 0.135335,
2485 };
2486 
2487 const float *
lp_build_sample_aniso_filter_table(void)2488 lp_build_sample_aniso_filter_table(void)
2489 {
2490    return aniso_filter_table;
2491 }
2492