1 /**************************************************************************
2 *
3 * Copyright 2010-2021 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
18 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 *
26 **************************************************************************/
27
28
29 #include "pipe/p_config.h"
30
31 #include "util/u_math.h"
32 #include "util/u_cpu_detect.h"
33 #include "util/u_pack_color.h"
34 #include "util/u_surface.h"
35 #include "util/u_sse.h"
36
37 #include "lp_jit.h"
38 #include "lp_rast.h"
39 #include "lp_debug.h"
40 #include "lp_state_fs.h"
41 #include "lp_linear_priv.h"
42
43
44 #if defined(PIPE_ARCH_SSE)
45
46 #include <emmintrin.h>
47
48
49 struct nearest_sampler {
50 PIPE_ALIGN_VAR(16) uint32_t out[64];
51
52 const struct lp_jit_texture *texture;
53 float fsrc_x; /* src_x0 */
54 float fsrc_y; /* src_y0 */
55 float fdsdx; /* sx */
56 float fdsdy; /* sx */
57 float fdtdx; /* sy */
58 float fdtdy; /* sy */
59 int width;
60 int y;
61
62 const uint32_t *(*fetch)(struct nearest_sampler *samp);
63 };
64
65
66 struct linear_interp {
67 PIPE_ALIGN_VAR(16) uint32_t out[64];
68 __m128i a0;
69 __m128i dadx;
70 __m128i dady;
71 int width; /* rounded up to multiple of 4 */
72 boolean is_constant;
73 };
74
75 /* Organize all the information needed for blending in one place.
76 * Could have blend function pointer here, but we currently always
77 * know which one we want to call.
78 */
79 struct color_blend {
80 const uint32_t *src;
81 uint8_t *color;
82 int stride;
83 int width; /* the exact width */
84 };
85
86
87 /* Organize all the information needed for running each of the shaders
88 * in one place.
89 */
90 struct shader {
91 PIPE_ALIGN_VAR(16) uint32_t out0[64];
92 const uint32_t *src0;
93 const uint32_t *src1;
94 __m128i const0;
95 int width; /* rounded up to multiple of 4 */
96 };
97
98
99 /* For a row of pixels, perform add/one/inv_src_alpha (ie
100 * premultiplied alpha) blending between the incoming pixels and the
101 * destination buffer.
102 *
103 * Used to implement the BLIT_RGBA + blend shader, there are no
104 * operations from the pixel shader left to implement at this level -
105 * effectively the pixel shader was just a texture fetch which has
106 * already been performed. This routine then purely implements
107 * blending.
108 */
109 static void
blend_premul(struct color_blend * blend)110 blend_premul(struct color_blend *blend)
111 {
112 const uint32_t *src = blend->src; /* aligned */
113 uint32_t *dst = (uint32_t *)blend->color; /* unaligned */
114 int width = blend->width;
115 int i;
116 __m128i tmp;
117 union { __m128i m128; uint ui[4]; } dstreg;
118
119 blend->color += blend->stride;
120
121 for (i = 0; i + 3 < width; i += 4) {
122 tmp = _mm_loadu_si128((const __m128i *)&dst[i]); /* UNALIGNED READ */
123 dstreg.m128 = util_sse2_blend_premul_4(*(const __m128i *)&src[i],
124 tmp);
125 _mm_storeu_si128((__m128i *)&dst[i], dstreg.m128); /* UNALIGNED WRITE */
126 }
127
128 if (i < width) {
129 int j;
130 for (j = 0; j < width - i ; j++) {
131 dstreg.ui[j] = dst[i+j];
132 }
133 dstreg.m128 = util_sse2_blend_premul_4(*(const __m128i *)&src[i],
134 dstreg.m128);
135 for (; i < width; i++)
136 dst[i] = dstreg.ui[i&3];
137 }
138 }
139
140
141 static void
blend_noop(struct color_blend * blend)142 blend_noop(struct color_blend *blend)
143 {
144 memcpy(blend->color, blend->src, blend->width * sizeof(unsigned));
145 blend->color += blend->stride;
146 }
147
148
149 static void
init_blend(struct color_blend * blend,int x,int y,int width,int height,uint8_t * color,int stride)150 init_blend(struct color_blend *blend,
151 int x, int y, int width, int height,
152 uint8_t *color,
153 int stride)
154 {
155 blend->color = color + x * 4 + y * stride;
156 blend->stride = stride;
157 blend->width = width;
158 }
159
160
161 /*
162 * Perform nearest filtered lookup of a row of texels. Texture lookup
163 * is assumed to be axis aligned but with arbitrary scaling.
164 *
165 * Texture coordinate interpolation is performed in 24.8 fixed point.
166 * Note that the longest span we will encounter is 64 pixels long,
167 * meaning that 8 fractional bits is more than sufficient to represent
168 * the shallowest gradient possible within this span.
169 *
170 * After 64 pixels (ie. in the next tile), the starting point will be
171 * recalculated with floating point arithmetic.
172 *
173 * XXX: migrate this to use Jose's quad blitter texture fetch routines.
174 */
175 static const uint32_t *
fetch_row(struct nearest_sampler * samp)176 fetch_row(struct nearest_sampler *samp)
177 {
178 int y = samp->y++;
179 uint32_t *row = samp->out;
180 const struct lp_jit_texture *texture = samp->texture;
181 int yy = util_iround(samp->fsrc_y + samp->fdtdy * y);
182 const uint32_t *src_row =
183 (const uint32_t *)((const uint8_t *)texture->base +
184 yy * texture->row_stride[0]);
185 int iscale_x = samp->fdsdx * 256;
186 int acc = samp->fsrc_x * 256 + 128;
187 int width = samp->width;
188 int i;
189
190 for (i = 0; i < width; i++) {
191 row[i] = src_row[acc>>8];
192 acc += iscale_x;
193 }
194
195 return row;
196 }
197
198 /* Version of fetch_row which can cope with texture edges. In
199 * practise, aero never triggers this.
200 */
201 static const uint32_t *
fetch_row_clamped(struct nearest_sampler * samp)202 fetch_row_clamped(struct nearest_sampler *samp)
203 {
204 int y = samp->y++;
205 uint32_t *row = samp->out;
206 const struct lp_jit_texture *texture = samp->texture;
207
208 int yy = util_iround(samp->fsrc_y + samp->fdtdy * y);
209
210 const uint32_t *src_row =
211 (const uint32_t *)((const uint8_t *)texture->base +
212 CLAMP(yy, 0, texture->height-1) *
213 texture->row_stride[0]);
214 float src_x0 = samp->fsrc_x;
215 float scale_x = samp->fdsdx;
216 int width = samp->width;
217 int i;
218
219 for (i = 0; i < width; i++) {
220 row[i] = src_row[CLAMP(util_iround(src_x0 + i*scale_x),0,texture->width-1)];
221 }
222
223 return row;
224 }
225
226 /* It vary rarely happens that some non-axis-aligned texturing creeps
227 * into the linear path. Handle it here. The alternative would be
228 * more pre-checking or an option to fallback by returning false from
229 * jit_linear.
230 */
231 static const uint32_t *
fetch_row_xy_clamped(struct nearest_sampler * samp)232 fetch_row_xy_clamped(struct nearest_sampler *samp)
233 {
234 int y = samp->y++;
235 uint32_t *row = samp->out;
236 const struct lp_jit_texture *texture = samp->texture;
237 float yrow = samp->fsrc_y + samp->fdtdy * y;
238 float xrow = samp->fsrc_x + samp->fdsdy * y;
239 int width = samp->width;
240 int i;
241
242 for (i = 0; i < width; i++) {
243 int yy = util_iround(yrow + samp->fdtdx * i);
244 int xx = util_iround(xrow + samp->fdsdx * i);
245
246 const uint32_t *src_row =
247 (const uint32_t *)((const uint8_t *)texture->base +
248 CLAMP(yy, 0, texture->height-1) *
249 texture->row_stride[0]);
250
251 row[i] = src_row[CLAMP(xx,0,texture->width-1)];
252 }
253
254 return row;
255 }
256
257
258 static boolean
init_nearest_sampler(struct nearest_sampler * samp,const struct lp_jit_texture * texture,int x0,int y0,int width,int height,float s0,float dsdx,float dsdy,float t0,float dtdx,float dtdy,float w0,float dwdx,float dwdy)259 init_nearest_sampler(struct nearest_sampler *samp,
260 const struct lp_jit_texture *texture,
261 int x0, int y0,
262 int width, int height,
263 float s0, float dsdx, float dsdy,
264 float t0, float dtdx, float dtdy,
265 float w0, float dwdx, float dwdy)
266 {
267 int i;
268 float oow = 1.0f / w0;
269
270 if (dwdx != 0.0 || dwdy != 0.0)
271 return FALSE;
272
273 samp->texture = texture;
274 samp->width = width;
275 samp->fdsdx = dsdx * texture->width * oow;
276 samp->fdsdy = dsdy * texture->width * oow;
277 samp->fdtdx = dtdx * texture->height * oow;
278 samp->fdtdy = dtdy * texture->height * oow;
279 samp->fsrc_x = (samp->fdsdx * x0 +
280 samp->fdsdy * y0 +
281 s0 * texture->width * oow - 0.5f);
282
283 samp->fsrc_y = (samp->fdtdx * x0 +
284 samp->fdtdy * y0 +
285 t0 * texture->height * oow - 0.5f);
286 samp->y = 0;
287
288 /* Because we want to permit consumers of this data to round up to
289 * the next multiple of 4, and because we don't want valgrind to
290 * complain about uninitialized reads, set the last bit of the
291 * buffer to zero:
292 */
293 for (i = width; i & 3; i++)
294 samp->out[i] = 0;
295
296 if (dsdy != 0 || dtdx != 0)
297 {
298 /* Arbitrary texture lookup:
299 */
300 samp->fetch = fetch_row_xy_clamped;
301 }
302 else
303 {
304 /* Axis aligned stretch blit, abitrary scaling factors including
305 * flipped, minifying and magnifying:
306 */
307 int isrc_x = util_iround(samp->fsrc_x);
308 int isrc_y = util_iround(samp->fsrc_y);
309 int isrc_x1 = util_iround(samp->fsrc_x + width * samp->fdsdx);
310 int isrc_y1 = util_iround(samp->fsrc_y + height * samp->fdtdy);
311
312 /* Look at the maximum and minimum texture coordinates we will be
313 * fetching and figure out if we need to use clamping. There is
314 * similar code in u_blit_sw.c which takes a better approach to
315 * this which could be substituted later.
316 */
317 if (isrc_x <= texture->width && isrc_x >= 0 &&
318 isrc_y <= texture->height && isrc_y >= 0 &&
319 isrc_x1 <= texture->width && isrc_x1 >= 0 &&
320 isrc_y1 <= texture->height && isrc_y1 >= 0)
321 {
322 samp->fetch = fetch_row;
323 }
324 else {
325 samp->fetch = fetch_row_clamped;
326 }
327 }
328
329 return TRUE;
330 }
331
332
333 static const uint32_t *
shade_rgb1(struct shader * shader)334 shade_rgb1(struct shader *shader)
335 {
336 const __m128i rgb1 = _mm_set1_epi32(0xff000000);
337 const uint32_t *src0 = shader->src0;
338 uint32_t *dst = shader->out0;
339 int width = shader->width;
340 int i;
341
342 for (i = 0; i + 3 < width; i += 4) {
343 __m128i s = *(const __m128i *)&src0[i];
344 *(__m128i *)&dst[i] = _mm_or_si128(s, rgb1);
345 }
346
347 return shader->out0;
348 }
349
350
351 static void
init_shader(struct shader * shader,int x,int y,int width,int height)352 init_shader(struct shader *shader,
353 int x, int y, int width, int height)
354 {
355 shader->width = align(width, 4);
356 }
357
358
359 /* Linear shader which implements the BLIT_RGBA shader with the
360 * additional constraints imposed by lp_setup_is_blit().
361 */
362 static boolean
blit_rgba_blit(const struct lp_rast_state * state,unsigned x,unsigned y,unsigned width,unsigned height,const float (* a0)[4],const float (* dadx)[4],const float (* dady)[4],uint8_t * color,unsigned stride)363 blit_rgba_blit(const struct lp_rast_state *state,
364 unsigned x, unsigned y,
365 unsigned width, unsigned height,
366 const float (*a0)[4],
367 const float (*dadx)[4],
368 const float (*dady)[4],
369 uint8_t *color,
370 unsigned stride)
371 {
372 const struct lp_jit_context *context = &state->jit_context;
373 const struct lp_jit_texture *texture = &context->textures[0];
374 const uint8_t *src;
375 unsigned src_stride;
376 int src_x, src_y;
377
378 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
379
380 /* Require w==1.0:
381 */
382 if (a0[0][3] != 1.0 ||
383 dadx[0][3] != 0.0 ||
384 dady[0][3] != 0.0)
385 return FALSE;
386
387 src_x = x + util_iround(a0[1][0]*texture->width - 0.5f);
388 src_y = y + util_iround(a0[1][1]*texture->height - 0.5f);
389
390 src = texture->base;
391 src_stride = texture->row_stride[0];
392
393 /* Fall back to blit_rgba() if clamping required:
394 */
395 if (src_x < 0 ||
396 src_y < 0 ||
397 src_x + width > texture->width ||
398 src_y + height > texture->height)
399 return FALSE;
400
401 util_copy_rect(color, PIPE_FORMAT_B8G8R8A8_UNORM, stride,
402 x, y,
403 width, height,
404 src, src_stride,
405 src_x, src_y);
406
407 return TRUE;
408 }
409
410
411 /* Linear shader which implements the BLIT_RGB1 shader, with the
412 * additional constraints imposed by lp_setup_is_blit().
413 */
414 static boolean
blit_rgb1_blit(const struct lp_rast_state * state,unsigned x,unsigned y,unsigned width,unsigned height,const float (* a0)[4],const float (* dadx)[4],const float (* dady)[4],uint8_t * color,unsigned stride)415 blit_rgb1_blit(const struct lp_rast_state *state,
416 unsigned x, unsigned y,
417 unsigned width, unsigned height,
418 const float (*a0)[4],
419 const float (*dadx)[4],
420 const float (*dady)[4],
421 uint8_t *color,
422 unsigned stride)
423 {
424 const struct lp_jit_context *context = &state->jit_context;
425 const struct lp_jit_texture *texture = &context->textures[0];
426 const uint8_t *src;
427 unsigned src_stride;
428 int src_x, src_y;
429
430 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
431
432 /* Require w==1.0:
433 */
434 if (a0[0][3] != 1.0 ||
435 dadx[0][3] != 0.0 ||
436 dady[0][3] != 0.0)
437 return FALSE;
438
439 color += x * 4 + y * stride;
440
441 src_x = x + util_iround(a0[1][0]*texture->width - 0.5f);
442 src_y = y + util_iround(a0[1][1]*texture->height - 0.5f);
443
444 src = texture->base;
445 src_stride = texture->row_stride[0];
446 src += src_x * 4;
447 src += src_y * src_stride;
448
449 if (src_x < 0 ||
450 src_y < 0 ||
451 src_x + width > texture->width ||
452 src_y + height > texture->height)
453 return FALSE;
454
455 for (y = 0; y < height; y++) {
456 const uint32_t *src_row = (const uint32_t *)src;
457 uint32_t *dst_row = (uint32_t *)color;
458
459 for (x = 0; x < width; x++) {
460 *dst_row++ = *src_row++ | 0xff000000;
461 }
462
463 color += stride;
464 src += src_stride;
465 }
466
467 return TRUE;
468 }
469
470
471 /* Linear shader variant implementing the BLIT_RGBA shader without
472 * blending.
473 */
474 static boolean
blit_rgba(const struct lp_rast_state * state,unsigned x,unsigned y,unsigned width,unsigned height,const float (* a0)[4],const float (* dadx)[4],const float (* dady)[4],uint8_t * color,unsigned stride)475 blit_rgba(const struct lp_rast_state *state,
476 unsigned x, unsigned y,
477 unsigned width, unsigned height,
478 const float (*a0)[4],
479 const float (*dadx)[4],
480 const float (*dady)[4],
481 uint8_t *color,
482 unsigned stride)
483 {
484 const struct lp_jit_context *context = &state->jit_context;
485 struct nearest_sampler samp;
486 struct color_blend blend;
487
488 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
489
490 if (!init_nearest_sampler(&samp,
491 &context->textures[0],
492 x, y, width, height,
493 a0[1][0], dadx[1][0], dady[1][0],
494 a0[1][1], dadx[1][1], dady[1][1],
495 a0[0][3], dadx[0][3], dady[0][3]))
496 return FALSE;
497
498 init_blend(&blend,
499 x, y, width, height,
500 color, stride);
501
502 /* Rasterize the rectangle and run the shader:
503 */
504 for (y = 0; y < height; y++) {
505 blend.src = samp.fetch(&samp);
506 blend_noop(&blend);
507 }
508
509 return TRUE;
510 }
511
512
513 static boolean
blit_rgb1(const struct lp_rast_state * state,unsigned x,unsigned y,unsigned width,unsigned height,const float (* a0)[4],const float (* dadx)[4],const float (* dady)[4],uint8_t * color,unsigned stride)514 blit_rgb1(const struct lp_rast_state *state,
515 unsigned x, unsigned y,
516 unsigned width, unsigned height,
517 const float (*a0)[4],
518 const float (*dadx)[4],
519 const float (*dady)[4],
520 uint8_t *color,
521 unsigned stride)
522 {
523 const struct lp_jit_context *context = &state->jit_context;
524 struct nearest_sampler samp;
525 struct color_blend blend;
526 struct shader shader;
527
528 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
529
530 if (!init_nearest_sampler(&samp,
531 &context->textures[0],
532 x, y, width, height,
533 a0[1][0], dadx[1][0], dady[1][0],
534 a0[1][1], dadx[1][1], dady[1][1],
535 a0[0][3], dadx[0][3], dady[0][3]))
536 return FALSE;
537
538 init_blend(&blend,
539 x, y, width, height,
540 color, stride);
541
542
543 init_shader(&shader,
544 x, y, width, height);
545
546 /* Rasterize the rectangle and run the shader:
547 */
548 for (y = 0; y < height; y++) {
549 shader.src0 = samp.fetch(&samp);
550 blend.src = shade_rgb1(&shader);
551 blend_noop(&blend);
552 }
553
554 return TRUE;
555 }
556
557
558 /* Linear shader variant implementing the BLIT_RGBA shader with
559 * one/inv_src_alpha blending.
560 */
561 static boolean
blit_rgba_blend_premul(const struct lp_rast_state * state,unsigned x,unsigned y,unsigned width,unsigned height,const float (* a0)[4],const float (* dadx)[4],const float (* dady)[4],uint8_t * color,unsigned stride)562 blit_rgba_blend_premul(const struct lp_rast_state *state,
563 unsigned x, unsigned y,
564 unsigned width, unsigned height,
565 const float (*a0)[4],
566 const float (*dadx)[4],
567 const float (*dady)[4],
568 uint8_t *color,
569 unsigned stride)
570 {
571 const struct lp_jit_context *context = &state->jit_context;
572 struct nearest_sampler samp;
573 struct color_blend blend;
574
575 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
576
577 if (!init_nearest_sampler(&samp,
578 &context->textures[0],
579 x, y, width, height,
580 a0[1][0], dadx[1][0], dady[1][0],
581 a0[1][1], dadx[1][1], dady[1][1],
582 a0[0][3], dadx[0][3], dady[0][3]))
583 return FALSE;
584
585
586 init_blend(&blend,
587 x, y, width, height,
588 color, stride);
589
590 /* Rasterize the rectangle and run the shader:
591 */
592 for (y = 0; y < height; y++) {
593 blend.src = samp.fetch(&samp);
594 blend_premul(&blend);
595 }
596
597 return TRUE;
598 }
599
600
601 /* Linear shader which always emits red. Used for debugging.
602 */
603 static boolean
linear_red(const struct lp_rast_state * state,unsigned x,unsigned y,unsigned width,unsigned height,const float (* a0)[4],const float (* dadx)[4],const float (* dady)[4],uint8_t * color,unsigned stride)604 linear_red(const struct lp_rast_state *state,
605 unsigned x, unsigned y,
606 unsigned width, unsigned height,
607 const float (*a0)[4],
608 const float (*dadx)[4],
609 const float (*dady)[4],
610 uint8_t *color,
611 unsigned stride)
612 {
613 union util_color uc;
614
615 util_pack_color_ub(0xff, 0, 0, 0xff,
616 PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
617
618 util_fill_rect(color,
619 PIPE_FORMAT_B8G8R8A8_UNORM,
620 stride,
621 x,
622 y,
623 width,
624 height,
625 &uc);
626
627 return TRUE;
628 }
629
630
631 /* Noop linear shader variant, for debugging.
632 */
633 static boolean
linear_no_op(const struct lp_rast_state * state,unsigned x,unsigned y,unsigned width,unsigned height,const float (* a0)[4],const float (* dadx)[4],const float (* dady)[4],uint8_t * color,unsigned stride)634 linear_no_op(const struct lp_rast_state *state,
635 unsigned x, unsigned y,
636 unsigned width, unsigned height,
637 const float (*a0)[4],
638 const float (*dadx)[4],
639 const float (*dady)[4],
640 uint8_t *color,
641 unsigned stride)
642 {
643 return TRUE;
644 }
645
646 /* Check for ADD/ONE/INV_SRC_ALPHA, ie premultiplied-alpha blending.
647 */
648 static boolean
is_one_inv_src_alpha_blend(const struct lp_fragment_shader_variant * variant)649 is_one_inv_src_alpha_blend(const struct lp_fragment_shader_variant *variant)
650 {
651 return
652 !variant->key.blend.logicop_enable &&
653 variant->key.blend.rt[0].blend_enable &&
654 variant->key.blend.rt[0].rgb_func == PIPE_BLEND_ADD &&
655 variant->key.blend.rt[0].rgb_src_factor == PIPE_BLENDFACTOR_ONE &&
656 variant->key.blend.rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_INV_SRC_ALPHA &&
657 variant->key.blend.rt[0].alpha_func == PIPE_BLEND_ADD &&
658 variant->key.blend.rt[0].alpha_src_factor == PIPE_BLENDFACTOR_ONE &&
659 variant->key.blend.rt[0].alpha_dst_factor == PIPE_BLENDFACTOR_INV_SRC_ALPHA &&
660 variant->key.blend.rt[0].colormask == 0xf;
661 }
662
663
664 /* Examine the fragment shader varient and determine whether we can
665 * substitute a fastpath linear shader implementation.
666 */
667 void
llvmpipe_fs_variant_linear_fastpath(struct lp_fragment_shader_variant * variant)668 llvmpipe_fs_variant_linear_fastpath(struct lp_fragment_shader_variant *variant)
669 {
670 struct lp_sampler_static_state *samp0 = lp_fs_variant_key_sampler_idx(&variant->key, 0);
671
672 if (LP_PERF & PERF_NO_SHADE) {
673 variant->jit_linear = linear_red;
674 return;
675 }
676
677 if (!samp0)
678 return;
679
680 enum pipe_format tex_format = samp0->texture_state.format;
681 if (variant->shader->kind == LP_FS_KIND_BLIT_RGBA &&
682 tex_format == PIPE_FORMAT_B8G8R8A8_UNORM &&
683 is_nearest_clamp_sampler(samp0)) {
684 if (variant->opaque) {
685 variant->jit_linear_blit = blit_rgba_blit;
686 variant->jit_linear = blit_rgba;
687 }
688 else if (is_one_inv_src_alpha_blend(variant) &&
689 util_get_cpu_caps()->has_sse2) {
690 variant->jit_linear = blit_rgba_blend_premul;
691 }
692 return;
693 }
694
695 if (variant->shader->kind == LP_FS_KIND_BLIT_RGB1 &&
696 variant->opaque &&
697 (tex_format == PIPE_FORMAT_B8G8R8A8_UNORM ||
698 tex_format == PIPE_FORMAT_B8G8R8X8_UNORM) &&
699 is_nearest_clamp_sampler(samp0)) {
700 variant->jit_linear_blit = blit_rgb1_blit;
701 variant->jit_linear = blit_rgb1;
702 return;
703 }
704
705 if (0) {
706 variant->jit_linear = linear_no_op;
707 return;
708 }
709 }
710 #else
711 void
llvmpipe_fs_variant_linear_fastpath(struct lp_fragment_shader_variant * variant)712 llvmpipe_fs_variant_linear_fastpath(struct lp_fragment_shader_variant *variant)
713 {
714 /* don't bother if there is no SSE */
715 }
716 #endif
717
718