1 /**************************************************************************
2  *
3  * Copyright 2010-2021 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
18  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * The above copyright notice and this permission notice (including the
23  * next paragraph) shall be included in all copies or substantial portions
24  * of the Software.
25  *
26  **************************************************************************/
27 
28 
29 #include "pipe/p_config.h"
30 
31 #include "util/u_math.h"
32 #include "util/u_cpu_detect.h"
33 #include "util/u_pack_color.h"
34 #include "util/u_surface.h"
35 #include "util/u_sse.h"
36 
37 #include "lp_jit.h"
38 #include "lp_rast.h"
39 #include "lp_debug.h"
40 #include "lp_state_fs.h"
41 #include "lp_linear_priv.h"
42 
43 
44 #if defined(PIPE_ARCH_SSE)
45 
46 #include <emmintrin.h>
47 
48 
49 struct nearest_sampler {
50    PIPE_ALIGN_VAR(16) uint32_t out[64];
51 
52    const struct lp_jit_texture *texture;
53    float fsrc_x;                /* src_x0 */
54    float fsrc_y;                /* src_y0 */
55    float fdsdx;              /* sx */
56    float fdsdy;              /* sx */
57    float fdtdx;              /* sy */
58    float fdtdy;              /* sy */
59    int width;
60    int y;
61 
62    const uint32_t *(*fetch)(struct nearest_sampler *samp);
63 };
64 
65 
66 struct linear_interp {
67    PIPE_ALIGN_VAR(16) uint32_t out[64];
68    __m128i a0;
69    __m128i dadx;
70    __m128i dady;
71    int width;                   /* rounded up to multiple of 4 */
72    boolean is_constant;
73 };
74 
75 /* Organize all the information needed for blending in one place.
76  * Could have blend function pointer here, but we currently always
77  * know which one we want to call.
78  */
79 struct color_blend {
80    const uint32_t *src;
81    uint8_t *color;
82    int stride;
83    int width;                   /* the exact width */
84 };
85 
86 
87 /* Organize all the information needed for running each of the shaders
88  * in one place.
89  */
90 struct shader {
91    PIPE_ALIGN_VAR(16) uint32_t out0[64];
92    const uint32_t *src0;
93    const uint32_t *src1;
94    __m128i const0;
95    int width;                   /* rounded up to multiple of 4 */
96 };
97 
98 
99 /* For a row of pixels, perform add/one/inv_src_alpha (ie
100  * premultiplied alpha) blending between the incoming pixels and the
101  * destination buffer.
102  *
103  * Used to implement the BLIT_RGBA + blend shader, there are no
104  * operations from the pixel shader left to implement at this level -
105  * effectively the pixel shader was just a texture fetch which has
106  * already been performed.  This routine then purely implements
107  * blending.
108  */
109 static void
blend_premul(struct color_blend * blend)110 blend_premul(struct color_blend *blend)
111 {
112    const uint32_t *src = blend->src;  /* aligned */
113    uint32_t *dst = (uint32_t *)blend->color;      /* unaligned */
114    int width = blend->width;
115    int i;
116    __m128i tmp;
117    union { __m128i m128; uint ui[4]; } dstreg;
118 
119    blend->color += blend->stride;
120 
121    for (i = 0; i + 3 < width; i += 4) {
122       tmp = _mm_loadu_si128((const __m128i *)&dst[i]);  /* UNALIGNED READ */
123       dstreg.m128 = util_sse2_blend_premul_4(*(const __m128i *)&src[i],
124                                              tmp);
125       _mm_storeu_si128((__m128i *)&dst[i], dstreg.m128); /* UNALIGNED WRITE */
126    }
127 
128    if (i < width) {
129       int j;
130       for (j = 0; j < width - i ; j++) {
131          dstreg.ui[j] = dst[i+j];
132       }
133       dstreg.m128 = util_sse2_blend_premul_4(*(const __m128i *)&src[i],
134                                              dstreg.m128);
135       for (; i < width; i++)
136          dst[i] = dstreg.ui[i&3];
137    }
138 }
139 
140 
141 static void
blend_noop(struct color_blend * blend)142 blend_noop(struct color_blend *blend)
143 {
144    memcpy(blend->color, blend->src, blend->width * sizeof(unsigned));
145    blend->color += blend->stride;
146 }
147 
148 
149 static void
init_blend(struct color_blend * blend,int x,int y,int width,int height,uint8_t * color,int stride)150 init_blend(struct color_blend *blend,
151            int x, int y, int width, int height,
152            uint8_t *color,
153            int stride)
154 {
155    blend->color = color + x * 4 + y * stride;
156    blend->stride = stride;
157    blend->width = width;
158 }
159 
160 
161 /*
162  * Perform nearest filtered lookup of a row of texels.  Texture lookup
163  * is assumed to be axis aligned but with arbitrary scaling.
164  *
165  * Texture coordinate interpolation is performed in 24.8 fixed point.
166  * Note that the longest span we will encounter is 64 pixels long,
167  * meaning that 8 fractional bits is more than sufficient to represent
168  * the shallowest gradient possible within this span.
169  *
170  * After 64 pixels (ie. in the next tile), the starting point will be
171  * recalculated with floating point arithmetic.
172  *
173  * XXX: migrate this to use Jose's quad blitter texture fetch routines.
174  */
175 static const uint32_t *
fetch_row(struct nearest_sampler * samp)176 fetch_row(struct nearest_sampler *samp)
177 {
178    int y = samp->y++;
179    uint32_t *row = samp->out;
180    const struct lp_jit_texture *texture = samp->texture;
181    int yy = util_iround(samp->fsrc_y + samp->fdtdy * y);
182    const uint32_t *src_row =
183       (const uint32_t *)((const uint8_t *)texture->base +
184                          yy * texture->row_stride[0]);
185    int iscale_x = samp->fdsdx * 256;
186    int acc      = samp->fsrc_x * 256 + 128;
187    int width    = samp->width;
188    int i;
189 
190    for (i = 0; i < width; i++) {
191       row[i] = src_row[acc>>8];
192       acc += iscale_x;
193    }
194 
195    return row;
196 }
197 
198 /* Version of fetch_row which can cope with texture edges.  In
199  * practise, aero never triggers this.
200  */
201 static const uint32_t *
fetch_row_clamped(struct nearest_sampler * samp)202 fetch_row_clamped(struct nearest_sampler *samp)
203 {
204    int y = samp->y++;
205    uint32_t *row = samp->out;
206    const struct lp_jit_texture *texture = samp->texture;
207 
208    int yy = util_iround(samp->fsrc_y + samp->fdtdy * y);
209 
210    const uint32_t *src_row =
211       (const uint32_t *)((const uint8_t *)texture->base +
212                          CLAMP(yy, 0, texture->height-1) *
213                          texture->row_stride[0]);
214    float src_x0 = samp->fsrc_x;
215    float scale_x = samp->fdsdx;
216    int width    = samp->width;
217    int i;
218 
219    for (i = 0; i < width; i++) {
220       row[i] = src_row[CLAMP(util_iround(src_x0 + i*scale_x),0,texture->width-1)];
221    }
222 
223    return row;
224 }
225 
226 /* It vary rarely happens that some non-axis-aligned texturing creeps
227  * into the linear path.  Handle it here.  The alternative would be
228  * more pre-checking or an option to fallback by returning false from
229  * jit_linear.
230  */
231 static const uint32_t *
fetch_row_xy_clamped(struct nearest_sampler * samp)232 fetch_row_xy_clamped(struct nearest_sampler *samp)
233 {
234    int y = samp->y++;
235    uint32_t *row = samp->out;
236    const struct lp_jit_texture *texture = samp->texture;
237    float yrow = samp->fsrc_y + samp->fdtdy * y;
238    float xrow = samp->fsrc_x + samp->fdsdy * y;
239    int width  = samp->width;
240    int i;
241 
242    for (i = 0; i < width; i++) {
243       int yy = util_iround(yrow + samp->fdtdx * i);
244       int xx = util_iround(xrow + samp->fdsdx * i);
245 
246       const uint32_t *src_row =
247          (const uint32_t *)((const uint8_t *)texture->base +
248                             CLAMP(yy, 0, texture->height-1) *
249                             texture->row_stride[0]);
250 
251       row[i] = src_row[CLAMP(xx,0,texture->width-1)];
252    }
253 
254    return row;
255 }
256 
257 
258 static boolean
init_nearest_sampler(struct nearest_sampler * samp,const struct lp_jit_texture * texture,int x0,int y0,int width,int height,float s0,float dsdx,float dsdy,float t0,float dtdx,float dtdy,float w0,float dwdx,float dwdy)259 init_nearest_sampler(struct nearest_sampler *samp,
260                      const struct lp_jit_texture *texture,
261                      int x0, int y0,
262                      int width, int height,
263                      float s0, float dsdx, float dsdy,
264                      float t0, float dtdx, float dtdy,
265                      float w0, float dwdx, float dwdy)
266 {
267    int i;
268    float oow = 1.0f / w0;
269 
270    if (dwdx != 0.0 || dwdy != 0.0)
271       return FALSE;
272 
273    samp->texture = texture;
274    samp->width = width;
275    samp->fdsdx = dsdx * texture->width * oow;
276    samp->fdsdy = dsdy * texture->width * oow;
277    samp->fdtdx = dtdx * texture->height * oow;
278    samp->fdtdy = dtdy * texture->height * oow;
279    samp->fsrc_x = (samp->fdsdx * x0 +
280                    samp->fdsdy * y0 +
281                    s0 * texture->width * oow - 0.5f);
282 
283    samp->fsrc_y = (samp->fdtdx * x0 +
284                    samp->fdtdy * y0 +
285                    t0 * texture->height * oow - 0.5f);
286    samp->y = 0;
287 
288    /* Because we want to permit consumers of this data to round up to
289     * the next multiple of 4, and because we don't want valgrind to
290     * complain about uninitialized reads, set the last bit of the
291     * buffer to zero:
292     */
293    for (i = width; i & 3; i++)
294       samp->out[i] = 0;
295 
296    if (dsdy != 0 || dtdx != 0)
297    {
298       /* Arbitrary texture lookup:
299        */
300       samp->fetch = fetch_row_xy_clamped;
301    }
302    else
303    {
304       /* Axis aligned stretch blit, abitrary scaling factors including
305        * flipped, minifying and magnifying:
306        */
307       int isrc_x = util_iround(samp->fsrc_x);
308       int isrc_y = util_iround(samp->fsrc_y);
309       int isrc_x1 = util_iround(samp->fsrc_x + width * samp->fdsdx);
310       int isrc_y1 = util_iround(samp->fsrc_y + height * samp->fdtdy);
311 
312       /* Look at the maximum and minimum texture coordinates we will be
313        * fetching and figure out if we need to use clamping.  There is
314        * similar code in u_blit_sw.c which takes a better approach to
315        * this which could be substituted later.
316        */
317       if (isrc_x  <= texture->width  && isrc_x  >= 0 &&
318           isrc_y  <= texture->height && isrc_y  >= 0 &&
319           isrc_x1 <= texture->width  && isrc_x1 >= 0 &&
320           isrc_y1 <= texture->height && isrc_y1 >= 0)
321       {
322          samp->fetch = fetch_row;
323       }
324       else {
325          samp->fetch = fetch_row_clamped;
326       }
327    }
328 
329    return TRUE;
330 }
331 
332 
333 static const uint32_t *
shade_rgb1(struct shader * shader)334 shade_rgb1(struct shader *shader)
335 {
336    const __m128i rgb1 = _mm_set1_epi32(0xff000000);
337    const uint32_t *src0 = shader->src0;
338    uint32_t *dst = shader->out0;
339    int width = shader->width;
340    int i;
341 
342    for (i = 0; i + 3 < width; i += 4) {
343       __m128i s = *(const __m128i *)&src0[i];
344       *(__m128i *)&dst[i] = _mm_or_si128(s, rgb1);
345    }
346 
347    return shader->out0;
348 }
349 
350 
351 static void
init_shader(struct shader * shader,int x,int y,int width,int height)352 init_shader(struct shader *shader,
353            int x, int y, int width, int height)
354 {
355    shader->width = align(width, 4);
356 }
357 
358 
359 /* Linear shader which implements the BLIT_RGBA shader with the
360  * additional constraints imposed by lp_setup_is_blit().
361  */
362 static boolean
blit_rgba_blit(const struct lp_rast_state * state,unsigned x,unsigned y,unsigned width,unsigned height,const float (* a0)[4],const float (* dadx)[4],const float (* dady)[4],uint8_t * color,unsigned stride)363 blit_rgba_blit(const struct lp_rast_state *state,
364                unsigned x, unsigned y,
365                unsigned width, unsigned height,
366                const float (*a0)[4],
367                const float (*dadx)[4],
368                const float (*dady)[4],
369                uint8_t *color,
370                unsigned stride)
371 {
372    const struct lp_jit_context *context = &state->jit_context;
373    const struct lp_jit_texture *texture = &context->textures[0];
374    const uint8_t *src;
375    unsigned src_stride;
376    int src_x, src_y;
377 
378    LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
379 
380    /* Require w==1.0:
381     */
382    if (a0[0][3] != 1.0 ||
383        dadx[0][3] != 0.0 ||
384        dady[0][3] != 0.0)
385       return FALSE;
386 
387    src_x = x + util_iround(a0[1][0]*texture->width - 0.5f);
388    src_y = y + util_iround(a0[1][1]*texture->height - 0.5f);
389 
390    src = texture->base;
391    src_stride = texture->row_stride[0];
392 
393    /* Fall back to blit_rgba() if clamping required:
394     */
395    if (src_x < 0 ||
396        src_y < 0 ||
397        src_x + width > texture->width ||
398        src_y + height > texture->height)
399       return FALSE;
400 
401    util_copy_rect(color, PIPE_FORMAT_B8G8R8A8_UNORM, stride,
402                   x, y,
403                   width, height,
404                   src, src_stride,
405                   src_x, src_y);
406 
407    return TRUE;
408 }
409 
410 
411 /* Linear shader which implements the BLIT_RGB1 shader, with the
412  * additional constraints imposed by lp_setup_is_blit().
413  */
414 static boolean
blit_rgb1_blit(const struct lp_rast_state * state,unsigned x,unsigned y,unsigned width,unsigned height,const float (* a0)[4],const float (* dadx)[4],const float (* dady)[4],uint8_t * color,unsigned stride)415 blit_rgb1_blit(const struct lp_rast_state *state,
416                unsigned x, unsigned y,
417                unsigned width, unsigned height,
418                const float (*a0)[4],
419                const float (*dadx)[4],
420                const float (*dady)[4],
421                uint8_t *color,
422                unsigned stride)
423 {
424    const struct lp_jit_context *context = &state->jit_context;
425    const struct lp_jit_texture *texture = &context->textures[0];
426    const uint8_t *src;
427    unsigned src_stride;
428    int src_x, src_y;
429 
430    LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
431 
432    /* Require w==1.0:
433     */
434    if (a0[0][3] != 1.0 ||
435        dadx[0][3] != 0.0 ||
436        dady[0][3] != 0.0)
437       return FALSE;
438 
439    color += x * 4 + y * stride;
440 
441    src_x = x + util_iround(a0[1][0]*texture->width - 0.5f);
442    src_y = y + util_iround(a0[1][1]*texture->height - 0.5f);
443 
444    src = texture->base;
445    src_stride = texture->row_stride[0];
446    src += src_x * 4;
447    src += src_y * src_stride;
448 
449    if (src_x < 0 ||
450        src_y < 0 ||
451        src_x + width > texture->width ||
452        src_y + height > texture->height)
453       return FALSE;
454 
455    for (y = 0; y < height; y++) {
456       const uint32_t *src_row = (const uint32_t *)src;
457       uint32_t *dst_row = (uint32_t *)color;
458 
459       for (x = 0; x < width; x++) {
460          *dst_row++ = *src_row++ | 0xff000000;
461       }
462 
463       color += stride;
464       src += src_stride;
465    }
466 
467    return TRUE;
468 }
469 
470 
471 /* Linear shader variant implementing the BLIT_RGBA shader without
472  * blending.
473  */
474 static boolean
blit_rgba(const struct lp_rast_state * state,unsigned x,unsigned y,unsigned width,unsigned height,const float (* a0)[4],const float (* dadx)[4],const float (* dady)[4],uint8_t * color,unsigned stride)475 blit_rgba(const struct lp_rast_state *state,
476           unsigned x, unsigned y,
477           unsigned width, unsigned height,
478           const float (*a0)[4],
479           const float (*dadx)[4],
480           const float (*dady)[4],
481           uint8_t *color,
482           unsigned stride)
483 {
484    const struct lp_jit_context *context = &state->jit_context;
485    struct nearest_sampler samp;
486    struct color_blend blend;
487 
488    LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
489 
490    if (!init_nearest_sampler(&samp,
491                              &context->textures[0],
492                              x, y, width, height,
493                              a0[1][0], dadx[1][0], dady[1][0],
494                              a0[1][1], dadx[1][1], dady[1][1],
495                              a0[0][3], dadx[0][3], dady[0][3]))
496       return FALSE;
497 
498    init_blend(&blend,
499               x, y, width, height,
500               color, stride);
501 
502    /* Rasterize the rectangle and run the shader:
503     */
504    for (y = 0; y < height; y++) {
505       blend.src = samp.fetch(&samp);
506       blend_noop(&blend);
507    }
508 
509    return TRUE;
510 }
511 
512 
513 static boolean
blit_rgb1(const struct lp_rast_state * state,unsigned x,unsigned y,unsigned width,unsigned height,const float (* a0)[4],const float (* dadx)[4],const float (* dady)[4],uint8_t * color,unsigned stride)514 blit_rgb1(const struct lp_rast_state *state,
515           unsigned x, unsigned y,
516           unsigned width, unsigned height,
517           const float (*a0)[4],
518           const float (*dadx)[4],
519           const float (*dady)[4],
520           uint8_t *color,
521           unsigned stride)
522 {
523    const struct lp_jit_context *context = &state->jit_context;
524    struct nearest_sampler samp;
525    struct color_blend blend;
526    struct shader shader;
527 
528    LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
529 
530    if (!init_nearest_sampler(&samp,
531                              &context->textures[0],
532                              x, y, width, height,
533                              a0[1][0], dadx[1][0], dady[1][0],
534                              a0[1][1], dadx[1][1], dady[1][1],
535                              a0[0][3], dadx[0][3], dady[0][3]))
536       return FALSE;
537 
538    init_blend(&blend,
539               x, y, width, height,
540               color, stride);
541 
542 
543    init_shader(&shader,
544                x, y, width, height);
545 
546    /* Rasterize the rectangle and run the shader:
547     */
548    for (y = 0; y < height; y++) {
549       shader.src0 = samp.fetch(&samp);
550       blend.src = shade_rgb1(&shader);
551       blend_noop(&blend);
552    }
553 
554    return TRUE;
555 }
556 
557 
558 /* Linear shader variant implementing the BLIT_RGBA shader with
559  * one/inv_src_alpha blending.
560  */
561 static boolean
blit_rgba_blend_premul(const struct lp_rast_state * state,unsigned x,unsigned y,unsigned width,unsigned height,const float (* a0)[4],const float (* dadx)[4],const float (* dady)[4],uint8_t * color,unsigned stride)562 blit_rgba_blend_premul(const struct lp_rast_state *state,
563                        unsigned x, unsigned y,
564                        unsigned width, unsigned height,
565                        const float (*a0)[4],
566                        const float (*dadx)[4],
567                        const float (*dady)[4],
568                        uint8_t *color,
569                        unsigned stride)
570 {
571    const struct lp_jit_context *context = &state->jit_context;
572    struct nearest_sampler samp;
573    struct color_blend blend;
574 
575    LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
576 
577    if (!init_nearest_sampler(&samp,
578                              &context->textures[0],
579                              x, y, width, height,
580                              a0[1][0], dadx[1][0], dady[1][0],
581                              a0[1][1], dadx[1][1], dady[1][1],
582                              a0[0][3], dadx[0][3], dady[0][3]))
583       return FALSE;
584 
585 
586    init_blend(&blend,
587               x, y, width, height,
588               color, stride);
589 
590    /* Rasterize the rectangle and run the shader:
591     */
592    for (y = 0; y < height; y++) {
593       blend.src = samp.fetch(&samp);
594       blend_premul(&blend);
595    }
596 
597    return TRUE;
598 }
599 
600 
601 /* Linear shader which always emits red.  Used for debugging.
602  */
603 static boolean
linear_red(const struct lp_rast_state * state,unsigned x,unsigned y,unsigned width,unsigned height,const float (* a0)[4],const float (* dadx)[4],const float (* dady)[4],uint8_t * color,unsigned stride)604 linear_red(const struct lp_rast_state *state,
605            unsigned x, unsigned y,
606            unsigned width, unsigned height,
607            const float (*a0)[4],
608            const float (*dadx)[4],
609            const float (*dady)[4],
610            uint8_t *color,
611            unsigned stride)
612 {
613    union util_color uc;
614 
615    util_pack_color_ub(0xff, 0, 0, 0xff,
616                       PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
617 
618    util_fill_rect(color,
619                   PIPE_FORMAT_B8G8R8A8_UNORM,
620                   stride,
621                   x,
622                   y,
623                   width,
624                   height,
625                   &uc);
626 
627    return TRUE;
628 }
629 
630 
631 /* Noop linear shader variant, for debugging.
632  */
633 static boolean
linear_no_op(const struct lp_rast_state * state,unsigned x,unsigned y,unsigned width,unsigned height,const float (* a0)[4],const float (* dadx)[4],const float (* dady)[4],uint8_t * color,unsigned stride)634 linear_no_op(const struct lp_rast_state *state,
635              unsigned x, unsigned y,
636              unsigned width, unsigned height,
637              const float (*a0)[4],
638              const float (*dadx)[4],
639              const float (*dady)[4],
640              uint8_t *color,
641              unsigned stride)
642 {
643    return TRUE;
644 }
645 
646 /* Check for ADD/ONE/INV_SRC_ALPHA, ie premultiplied-alpha blending.
647  */
648 static boolean
is_one_inv_src_alpha_blend(const struct lp_fragment_shader_variant * variant)649 is_one_inv_src_alpha_blend(const struct lp_fragment_shader_variant *variant)
650 {
651    return
652       !variant->key.blend.logicop_enable &&
653       variant->key.blend.rt[0].blend_enable &&
654       variant->key.blend.rt[0].rgb_func == PIPE_BLEND_ADD &&
655       variant->key.blend.rt[0].rgb_src_factor == PIPE_BLENDFACTOR_ONE &&
656       variant->key.blend.rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_INV_SRC_ALPHA &&
657       variant->key.blend.rt[0].alpha_func == PIPE_BLEND_ADD &&
658       variant->key.blend.rt[0].alpha_src_factor == PIPE_BLENDFACTOR_ONE &&
659       variant->key.blend.rt[0].alpha_dst_factor == PIPE_BLENDFACTOR_INV_SRC_ALPHA &&
660       variant->key.blend.rt[0].colormask == 0xf;
661 }
662 
663 
664 /* Examine the fragment shader varient and determine whether we can
665  * substitute a fastpath linear shader implementation.
666  */
667 void
llvmpipe_fs_variant_linear_fastpath(struct lp_fragment_shader_variant * variant)668 llvmpipe_fs_variant_linear_fastpath(struct lp_fragment_shader_variant *variant)
669 {
670    struct lp_sampler_static_state *samp0 = lp_fs_variant_key_sampler_idx(&variant->key, 0);
671 
672    if (LP_PERF & PERF_NO_SHADE) {
673       variant->jit_linear                   = linear_red;
674       return;
675    }
676 
677    if (!samp0)
678       return;
679 
680    enum pipe_format tex_format = samp0->texture_state.format;
681    if (variant->shader->kind == LP_FS_KIND_BLIT_RGBA &&
682        tex_format == PIPE_FORMAT_B8G8R8A8_UNORM &&
683        is_nearest_clamp_sampler(samp0)) {
684       if (variant->opaque) {
685          variant->jit_linear_blit             = blit_rgba_blit;
686          variant->jit_linear                  = blit_rgba;
687       }
688       else if (is_one_inv_src_alpha_blend(variant) &&
689                util_get_cpu_caps()->has_sse2) {
690          variant->jit_linear                  = blit_rgba_blend_premul;
691       }
692       return;
693    }
694 
695    if (variant->shader->kind == LP_FS_KIND_BLIT_RGB1 &&
696        variant->opaque &&
697        (tex_format == PIPE_FORMAT_B8G8R8A8_UNORM ||
698         tex_format == PIPE_FORMAT_B8G8R8X8_UNORM) &&
699        is_nearest_clamp_sampler(samp0)) {
700       variant->jit_linear_blit             = blit_rgb1_blit;
701       variant->jit_linear                  = blit_rgb1;
702       return;
703    }
704 
705    if (0) {
706       variant->jit_linear                   = linear_no_op;
707       return;
708    }
709 }
710 #else
711 void
llvmpipe_fs_variant_linear_fastpath(struct lp_fragment_shader_variant * variant)712 llvmpipe_fs_variant_linear_fastpath(struct lp_fragment_shader_variant *variant)
713 {
714    /* don't bother if there is no SSE */
715 }
716 #endif
717 
718