1 /* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
2 /*
3 * Copyright © 2000 SuSE, Inc.
4 * Copyright © 2007 Red Hat, Inc.
5 *
6 * Permission to use, copy, modify, distribute, and sell this software and its
7 * documentation for any purpose is hereby granted without fee, provided that
8 * the above copyright notice appear in all copies and that both that
9 * copyright notice and this permission notice appear in supporting
10 * documentation, and that the name of SuSE not be used in advertising or
11 * publicity pertaining to distribution of the software without specific,
12 * written prior permission. SuSE makes no representations about the
13 * suitability of this software for any purpose. It is provided "as is"
14 * without express or implied warranty.
15 *
16 * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
18 * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
20 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
21 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
22 *
23 * Author: Keith Packard, SuSE, Inc.
24 */
25
26 #ifndef PIXMAN_FAST_PATH_H__
27 #define PIXMAN_FAST_PATH_H__
28
29 #include <stdlib.h>
30 #include "pixman-private.h"
31
32 #define PIXMAN_REPEAT_COVER -1
33
34 /* Flags describing input parameters to fast path macro template.
35 * Turning on some flag values may indicate that
36 * "some property X is available so template can use this" or
37 * "some property X should be handled by template".
38 *
39 * FLAG_HAVE_SOLID_MASK
40 * Input mask is solid so template should handle this.
41 *
42 * FLAG_HAVE_NON_SOLID_MASK
43 * Input mask is bits mask so template should handle this.
44 *
45 * FLAG_HAVE_SOLID_MASK and FLAG_HAVE_NON_SOLID_MASK are mutually
46 * exclusive. (It's not allowed to turn both flags on)
47 */
48 #define FLAG_NONE (0)
49 #define FLAG_HAVE_SOLID_MASK (1 << 1)
50 #define FLAG_HAVE_NON_SOLID_MASK (1 << 2)
51
52 /* To avoid too short repeated scanline function calls, extend source
53 * scanlines having width less than below constant value.
54 */
55 #define REPEAT_NORMAL_MIN_WIDTH 64
56
57 static force_inline pixman_bool_t
repeat(pixman_repeat_t repeat,int * c,int size)58 repeat (pixman_repeat_t repeat, int *c, int size)
59 {
60 if (repeat == PIXMAN_REPEAT_NONE)
61 {
62 if (*c < 0 || *c >= size)
63 return FALSE;
64 }
65 else if (repeat == PIXMAN_REPEAT_NORMAL)
66 {
67 while (*c >= size)
68 *c -= size;
69 while (*c < 0)
70 *c += size;
71 }
72 else if (repeat == PIXMAN_REPEAT_PAD)
73 {
74 *c = CLIP (*c, 0, size - 1);
75 }
76 else /* REFLECT */
77 {
78 *c = MOD (*c, size * 2);
79 if (*c >= size)
80 *c = size * 2 - *c - 1;
81 }
82 return TRUE;
83 }
84
85 static force_inline int
pixman_fixed_to_bilinear_weight(pixman_fixed_t x)86 pixman_fixed_to_bilinear_weight (pixman_fixed_t x)
87 {
88 return (x >> (16 - BILINEAR_INTERPOLATION_BITS)) &
89 ((1 << BILINEAR_INTERPOLATION_BITS) - 1);
90 }
91
92 #if BILINEAR_INTERPOLATION_BITS <= 4
93 /* Inspired by Filter_32_opaque from Skia */
94 static force_inline uint32_t
bilinear_interpolation(uint32_t tl,uint32_t tr,uint32_t bl,uint32_t br,int distx,int disty)95 bilinear_interpolation (uint32_t tl, uint32_t tr,
96 uint32_t bl, uint32_t br,
97 int distx, int disty)
98 {
99 int distxy, distxiy, distixy, distixiy;
100 uint32_t lo, hi;
101
102 distx <<= (4 - BILINEAR_INTERPOLATION_BITS);
103 disty <<= (4 - BILINEAR_INTERPOLATION_BITS);
104
105 distxy = distx * disty;
106 distxiy = (distx << 4) - distxy; /* distx * (16 - disty) */
107 distixy = (disty << 4) - distxy; /* disty * (16 - distx) */
108 distixiy =
109 16 * 16 - (disty << 4) -
110 (distx << 4) + distxy; /* (16 - distx) * (16 - disty) */
111
112 lo = (tl & 0xff00ff) * distixiy;
113 hi = ((tl >> 8) & 0xff00ff) * distixiy;
114
115 lo += (tr & 0xff00ff) * distxiy;
116 hi += ((tr >> 8) & 0xff00ff) * distxiy;
117
118 lo += (bl & 0xff00ff) * distixy;
119 hi += ((bl >> 8) & 0xff00ff) * distixy;
120
121 lo += (br & 0xff00ff) * distxy;
122 hi += ((br >> 8) & 0xff00ff) * distxy;
123
124 return ((lo >> 8) & 0xff00ff) | (hi & ~0xff00ff);
125 }
126
127 #else
128 #if SIZEOF_LONG > 4
129
130 static force_inline uint32_t
bilinear_interpolation(uint32_t tl,uint32_t tr,uint32_t bl,uint32_t br,int distx,int disty)131 bilinear_interpolation (uint32_t tl, uint32_t tr,
132 uint32_t bl, uint32_t br,
133 int distx, int disty)
134 {
135 uint64_t distxy, distxiy, distixy, distixiy;
136 uint64_t tl64, tr64, bl64, br64;
137 uint64_t f, r;
138
139 distx <<= (8 - BILINEAR_INTERPOLATION_BITS);
140 disty <<= (8 - BILINEAR_INTERPOLATION_BITS);
141
142 distxy = distx * disty;
143 distxiy = distx * (256 - disty);
144 distixy = (256 - distx) * disty;
145 distixiy = (256 - distx) * (256 - disty);
146
147 /* Alpha and Blue */
148 tl64 = tl & 0xff0000ff;
149 tr64 = tr & 0xff0000ff;
150 bl64 = bl & 0xff0000ff;
151 br64 = br & 0xff0000ff;
152
153 f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy;
154 r = f & 0x0000ff0000ff0000ull;
155
156 /* Red and Green */
157 tl64 = tl;
158 tl64 = ((tl64 << 16) & 0x000000ff00000000ull) | (tl64 & 0x0000ff00ull);
159
160 tr64 = tr;
161 tr64 = ((tr64 << 16) & 0x000000ff00000000ull) | (tr64 & 0x0000ff00ull);
162
163 bl64 = bl;
164 bl64 = ((bl64 << 16) & 0x000000ff00000000ull) | (bl64 & 0x0000ff00ull);
165
166 br64 = br;
167 br64 = ((br64 << 16) & 0x000000ff00000000ull) | (br64 & 0x0000ff00ull);
168
169 f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy;
170 r |= ((f >> 16) & 0x000000ff00000000ull) | (f & 0xff000000ull);
171
172 return (uint32_t)(r >> 16);
173 }
174
175 #else
176
177 #ifdef LOW_QUALITY_INTERPOLATION
178 /* Based on Filter_32_opaque_portable from Skia */
179 static force_inline uint32_t
bilinear_interpolation(uint32_t a00,uint32_t a01,uint32_t a10,uint32_t a11,int x,int y)180 bilinear_interpolation(uint32_t a00, uint32_t a01,
181 uint32_t a10, uint32_t a11,
182 int x, int y)
183 {
184 int xy = x * y;
185 static const uint32_t mask = 0xff00ff;
186
187 int scale = 256 - 16*y - 16*x + xy;
188 uint32_t lo = (a00 & mask) * scale;
189 uint32_t hi = ((a00 >> 8) & mask) * scale;
190
191 scale = 16*x - xy;
192 lo += (a01 & mask) * scale;
193 hi += ((a01 >> 8) & mask) * scale;
194
195 scale = 16*y - xy;
196 lo += (a10 & mask) * scale;
197 hi += ((a10 >> 8) & mask) * scale;
198
199 lo += (a11 & mask) * xy;
200 hi += ((a11 >> 8) & mask) * xy;
201
202 return ((lo >> 8) & mask) | (hi & ~mask);
203 }
204 #else
205 static force_inline uint32_t
bilinear_interpolation(uint32_t tl,uint32_t tr,uint32_t bl,uint32_t br,int distx,int disty)206 bilinear_interpolation (uint32_t tl, uint32_t tr,
207 uint32_t bl, uint32_t br,
208 int distx, int disty)
209 {
210 int distxy, distxiy, distixy, distixiy;
211 uint32_t f, r;
212
213 distx <<= (8 - BILINEAR_INTERPOLATION_BITS);
214 disty <<= (8 - BILINEAR_INTERPOLATION_BITS);
215
216 distxy = distx * disty;
217 distxiy = (distx << 8) - distxy; /* distx * (256 - disty) */
218 distixy = (disty << 8) - distxy; /* disty * (256 - distx) */
219 distixiy =
220 256 * 256 - (disty << 8) -
221 (distx << 8) + distxy; /* (256 - distx) * (256 - disty) */
222
223 /* Blue */
224 r = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy
225 + (bl & 0x000000ff) * distixy + (br & 0x000000ff) * distxy;
226
227 /* Green */
228 f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy
229 + (bl & 0x0000ff00) * distixy + (br & 0x0000ff00) * distxy;
230 r |= f & 0xff000000;
231
232 tl >>= 16;
233 tr >>= 16;
234 bl >>= 16;
235 br >>= 16;
236 r >>= 16;
237
238 /* Red */
239 f = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy
240 + (bl & 0x000000ff) * distixy + (br & 0x000000ff) * distxy;
241 r |= f & 0x00ff0000;
242
243 /* Alpha */
244 f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy
245 + (bl & 0x0000ff00) * distixy + (br & 0x0000ff00) * distxy;
246 r |= f & 0xff000000;
247
248 return r;
249 }
250 #endif
251 #endif
252 #endif // BILINEAR_INTERPOLATION_BITS <= 4
253
254 /*
255 * For each scanline fetched from source image with PAD repeat:
256 * - calculate how many pixels need to be padded on the left side
257 * - calculate how many pixels need to be padded on the right side
258 * - update width to only count pixels which are fetched from the image
259 * All this information is returned via 'width', 'left_pad', 'right_pad'
260 * arguments. The code is assuming that 'unit_x' is positive.
261 *
262 * Note: 64-bit math is used in order to avoid potential overflows, which
263 * is probably excessive in many cases. This particular function
264 * may need its own correctness test and performance tuning.
265 */
266 static force_inline void
pad_repeat_get_scanline_bounds(int32_t source_image_width,pixman_fixed_t vx,pixman_fixed_t unit_x,int32_t * width,int32_t * left_pad,int32_t * right_pad)267 pad_repeat_get_scanline_bounds (int32_t source_image_width,
268 pixman_fixed_t vx,
269 pixman_fixed_t unit_x,
270 int32_t * width,
271 int32_t * left_pad,
272 int32_t * right_pad)
273 {
274 int64_t max_vx = (int64_t) source_image_width << 16;
275 int64_t tmp;
276 if (vx < 0)
277 {
278 tmp = ((int64_t) unit_x - 1 - vx) / unit_x;
279 if (tmp > *width)
280 {
281 *left_pad = *width;
282 *width = 0;
283 }
284 else
285 {
286 *left_pad = (int32_t) tmp;
287 *width -= (int32_t) tmp;
288 }
289 }
290 else
291 {
292 *left_pad = 0;
293 }
294 tmp = ((int64_t) unit_x - 1 - vx + max_vx) / unit_x - *left_pad;
295 if (tmp < 0)
296 {
297 *right_pad = *width;
298 *width = 0;
299 }
300 else if (tmp >= *width)
301 {
302 *right_pad = 0;
303 }
304 else
305 {
306 *right_pad = *width - (int32_t) tmp;
307 *width = (int32_t) tmp;
308 }
309 }
310
311 /* A macroified version of specialized nearest scalers for some
312 * common 8888 and 565 formats. It supports SRC and OVER ops.
313 *
314 * There are two repeat versions, one that handles repeat normal,
315 * and one without repeat handling that only works if the src region
316 * used is completely covered by the pre-repeated source samples.
317 *
318 * The loops are unrolled to process two pixels per iteration for better
319 * performance on most CPU architectures (superscalar processors
320 * can issue several operations simultaneously, other processors can hide
321 * instructions latencies by pipelining operations). Unrolling more
322 * does not make much sense because the compiler will start running out
323 * of spare registers soon.
324 */
325
326 #define GET_8888_ALPHA(s) ((s) >> 24)
327 /* This is not actually used since we don't have an OVER with
328 565 source, but it is needed to build. */
329 #define GET_0565_ALPHA(s) 0xff
330 #define GET_x888_ALPHA(s) 0xff
331
332 #define FAST_NEAREST_SCANLINE(scanline_func_name, SRC_FORMAT, DST_FORMAT, \
333 src_type_t, dst_type_t, OP, repeat_mode) \
334 static force_inline void \
335 scanline_func_name (dst_type_t *dst, \
336 const src_type_t *src, \
337 int32_t w, \
338 pixman_fixed_t vx, \
339 pixman_fixed_t unit_x, \
340 pixman_fixed_t src_width_fixed, \
341 pixman_bool_t fully_transparent_src) \
342 { \
343 uint32_t d; \
344 src_type_t s1, s2; \
345 uint8_t a1, a2; \
346 int x1, x2; \
347 \
348 if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER && fully_transparent_src) \
349 return; \
350 \
351 if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER) \
352 abort(); \
353 \
354 while ((w -= 2) >= 0) \
355 { \
356 x1 = pixman_fixed_to_int (vx); \
357 vx += unit_x; \
358 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
359 { \
360 /* This works because we know that unit_x is positive */ \
361 while (vx >= 0) \
362 vx -= src_width_fixed; \
363 } \
364 s1 = *(src + x1); \
365 \
366 x2 = pixman_fixed_to_int (vx); \
367 vx += unit_x; \
368 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
369 { \
370 /* This works because we know that unit_x is positive */ \
371 while (vx >= 0) \
372 vx -= src_width_fixed; \
373 } \
374 s2 = *(src + x2); \
375 \
376 if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \
377 { \
378 a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \
379 a2 = GET_ ## SRC_FORMAT ## _ALPHA(s2); \
380 \
381 if (a1 == 0xff) \
382 { \
383 *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1); \
384 } \
385 else if (s1) \
386 { \
387 d = convert_ ## DST_FORMAT ## _to_8888 (*dst); \
388 s1 = convert_ ## SRC_FORMAT ## _to_8888 (s1); \
389 a1 ^= 0xff; \
390 UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \
391 *dst = convert_8888_to_ ## DST_FORMAT (d); \
392 } \
393 dst++; \
394 \
395 if (a2 == 0xff) \
396 { \
397 *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s2); \
398 } \
399 else if (s2) \
400 { \
401 d = convert_## DST_FORMAT ## _to_8888 (*dst); \
402 s2 = convert_## SRC_FORMAT ## _to_8888 (s2); \
403 a2 ^= 0xff; \
404 UN8x4_MUL_UN8_ADD_UN8x4 (d, a2, s2); \
405 *dst = convert_8888_to_ ## DST_FORMAT (d); \
406 } \
407 dst++; \
408 } \
409 else /* PIXMAN_OP_SRC */ \
410 { \
411 *dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1); \
412 *dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s2); \
413 } \
414 } \
415 \
416 if (w & 1) \
417 { \
418 x1 = pixman_fixed_to_int (vx); \
419 s1 = *(src + x1); \
420 \
421 if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \
422 { \
423 a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \
424 \
425 if (a1 == 0xff) \
426 { \
427 *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1); \
428 } \
429 else if (s1) \
430 { \
431 d = convert_## DST_FORMAT ## _to_8888 (*dst); \
432 s1 = convert_ ## SRC_FORMAT ## _to_8888 (s1); \
433 a1 ^= 0xff; \
434 UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \
435 *dst = convert_8888_to_ ## DST_FORMAT (d); \
436 } \
437 dst++; \
438 } \
439 else /* PIXMAN_OP_SRC */ \
440 { \
441 *dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1); \
442 } \
443 } \
444 }
445
446 #define FAST_NEAREST_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t, \
447 dst_type_t, repeat_mode, have_mask, mask_is_solid) \
448 static void \
449 fast_composite_scaled_nearest ## scale_func_name (pixman_implementation_t *imp, \
450 pixman_composite_info_t *info) \
451 { \
452 PIXMAN_COMPOSITE_ARGS (info); \
453 dst_type_t *dst_line; \
454 mask_type_t *mask_line; \
455 src_type_t *src_first_line; \
456 int y; \
457 pixman_fixed_t src_width_fixed = pixman_int_to_fixed (src_image->bits.width); \
458 pixman_fixed_t max_vy; \
459 pixman_vector_t v; \
460 pixman_fixed_t vx, vy; \
461 pixman_fixed_t unit_x, unit_y; \
462 int32_t left_pad, right_pad; \
463 \
464 src_type_t *src; \
465 dst_type_t *dst; \
466 mask_type_t solid_mask; \
467 const mask_type_t *mask = &solid_mask; \
468 int src_stride, mask_stride, dst_stride; \
469 \
470 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1); \
471 if (have_mask) \
472 { \
473 if (mask_is_solid) \
474 solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); \
475 else \
476 PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t, \
477 mask_stride, mask_line, 1); \
478 } \
479 /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \
480 * transformed from destination space to source space */ \
481 PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \
482 \
483 /* reference point is the center of the pixel */ \
484 v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \
485 v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \
486 v.vector[2] = pixman_fixed_1; \
487 \
488 if (!pixman_transform_point_3d (src_image->common.transform, &v)) \
489 return; \
490 \
491 unit_x = src_image->common.transform->matrix[0][0]; \
492 unit_y = src_image->common.transform->matrix[1][1]; \
493 \
494 /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */ \
495 v.vector[0] -= pixman_fixed_e; \
496 v.vector[1] -= pixman_fixed_e; \
497 \
498 vx = v.vector[0]; \
499 vy = v.vector[1]; \
500 \
501 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
502 { \
503 max_vy = pixman_int_to_fixed (src_image->bits.height); \
504 \
505 /* Clamp repeating positions inside the actual samples */ \
506 repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); \
507 repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \
508 } \
509 \
510 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \
511 PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
512 { \
513 pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, \
514 &width, &left_pad, &right_pad); \
515 vx += left_pad * unit_x; \
516 } \
517 \
518 while (--height >= 0) \
519 { \
520 dst = dst_line; \
521 dst_line += dst_stride; \
522 if (have_mask && !mask_is_solid) \
523 { \
524 mask = mask_line; \
525 mask_line += mask_stride; \
526 } \
527 \
528 y = pixman_fixed_to_int (vy); \
529 vy += unit_y; \
530 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
531 repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \
532 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \
533 { \
534 repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height); \
535 src = src_first_line + src_stride * y; \
536 if (left_pad > 0) \
537 { \
538 scanline_func (mask, dst, \
539 src + src_image->bits.width - src_image->bits.width + 1, \
540 left_pad, -pixman_fixed_e, 0, src_width_fixed, FALSE); \
541 } \
542 if (width > 0) \
543 { \
544 scanline_func (mask + (mask_is_solid ? 0 : left_pad), \
545 dst + left_pad, src + src_image->bits.width, width, \
546 vx - src_width_fixed, unit_x, src_width_fixed, FALSE); \
547 } \
548 if (right_pad > 0) \
549 { \
550 scanline_func (mask + (mask_is_solid ? 0 : left_pad + width), \
551 dst + left_pad + width, src + src_image->bits.width, \
552 right_pad, -pixman_fixed_e, 0, src_width_fixed, FALSE); \
553 } \
554 } \
555 else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
556 { \
557 static const src_type_t zero[1] = { 0 }; \
558 if (y < 0 || y >= src_image->bits.height) \
559 { \
560 scanline_func (mask, dst, zero + 1, left_pad + width + right_pad, \
561 -pixman_fixed_e, 0, src_width_fixed, TRUE); \
562 continue; \
563 } \
564 src = src_first_line + src_stride * y; \
565 if (left_pad > 0) \
566 { \
567 scanline_func (mask, dst, zero + 1, left_pad, \
568 -pixman_fixed_e, 0, src_width_fixed, TRUE); \
569 } \
570 if (width > 0) \
571 { \
572 scanline_func (mask + (mask_is_solid ? 0 : left_pad), \
573 dst + left_pad, src + src_image->bits.width, width, \
574 vx - src_width_fixed, unit_x, src_width_fixed, FALSE); \
575 } \
576 if (right_pad > 0) \
577 { \
578 scanline_func (mask + (mask_is_solid ? 0 : left_pad + width), \
579 dst + left_pad + width, zero + 1, right_pad, \
580 -pixman_fixed_e, 0, src_width_fixed, TRUE); \
581 } \
582 } \
583 else \
584 { \
585 src = src_first_line + src_stride * y; \
586 scanline_func (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed, \
587 unit_x, src_width_fixed, FALSE); \
588 } \
589 } \
590 }
591
592 /* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
593 #define FAST_NEAREST_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t, \
594 dst_type_t, repeat_mode, have_mask, mask_is_solid) \
595 FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t, \
596 dst_type_t, repeat_mode, have_mask, mask_is_solid)
597
598 #define FAST_NEAREST_MAINLOOP_NOMASK(scale_func_name, scanline_func, src_type_t, dst_type_t, \
599 repeat_mode) \
600 static force_inline void \
601 scanline_func##scale_func_name##_wrapper ( \
602 const uint8_t *mask, \
603 dst_type_t *dst, \
604 const src_type_t *src, \
605 int32_t w, \
606 pixman_fixed_t vx, \
607 pixman_fixed_t unit_x, \
608 pixman_fixed_t max_vx, \
609 pixman_bool_t fully_transparent_src) \
610 { \
611 scanline_func (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); \
612 } \
613 FAST_NEAREST_MAINLOOP_INT (scale_func_name, scanline_func##scale_func_name##_wrapper, \
614 src_type_t, uint8_t, dst_type_t, repeat_mode, FALSE, FALSE)
615
616 #define FAST_NEAREST_MAINLOOP(scale_func_name, scanline_func, src_type_t, dst_type_t, \
617 repeat_mode) \
618 FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name, scanline_func, src_type_t, \
619 dst_type_t, repeat_mode)
620
621 #define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT, \
622 src_type_t, dst_type_t, OP, repeat_mode) \
623 FAST_NEAREST_SCANLINE(scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \
624 SRC_FORMAT, DST_FORMAT, src_type_t, dst_type_t, \
625 OP, repeat_mode) \
626 FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name ## _ ## OP, \
627 scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \
628 src_type_t, dst_type_t, repeat_mode)
629
630
631 #define SCALED_NEAREST_FLAGS \
632 (FAST_PATH_SCALE_TRANSFORM | \
633 FAST_PATH_NO_ALPHA_MAP | \
634 FAST_PATH_NEAREST_FILTER | \
635 FAST_PATH_NO_ACCESSORS | \
636 FAST_PATH_NARROW_FORMAT)
637
638 #define SIMPLE_NEAREST_FAST_PATH_NORMAL(op,s,d,func) \
639 { PIXMAN_OP_ ## op, \
640 PIXMAN_ ## s, \
641 (SCALED_NEAREST_FLAGS | \
642 FAST_PATH_NORMAL_REPEAT | \
643 FAST_PATH_X_UNIT_POSITIVE), \
644 PIXMAN_null, 0, \
645 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
646 fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \
647 }
648
649 #define SIMPLE_NEAREST_FAST_PATH_PAD(op,s,d,func) \
650 { PIXMAN_OP_ ## op, \
651 PIXMAN_ ## s, \
652 (SCALED_NEAREST_FLAGS | \
653 FAST_PATH_PAD_REPEAT | \
654 FAST_PATH_X_UNIT_POSITIVE), \
655 PIXMAN_null, 0, \
656 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
657 fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \
658 }
659
660 #define SIMPLE_NEAREST_FAST_PATH_NONE(op,s,d,func) \
661 { PIXMAN_OP_ ## op, \
662 PIXMAN_ ## s, \
663 (SCALED_NEAREST_FLAGS | \
664 FAST_PATH_NONE_REPEAT | \
665 FAST_PATH_X_UNIT_POSITIVE), \
666 PIXMAN_null, 0, \
667 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
668 fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \
669 }
670
671 #define SIMPLE_NEAREST_FAST_PATH_COVER(op,s,d,func) \
672 { PIXMAN_OP_ ## op, \
673 PIXMAN_ ## s, \
674 SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST, \
675 PIXMAN_null, 0, \
676 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
677 fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \
678 }
679
680 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NORMAL(op,s,d,func) \
681 { PIXMAN_OP_ ## op, \
682 PIXMAN_ ## s, \
683 (SCALED_NEAREST_FLAGS | \
684 FAST_PATH_NORMAL_REPEAT | \
685 FAST_PATH_X_UNIT_POSITIVE), \
686 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
687 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
688 fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \
689 }
690
691 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD(op,s,d,func) \
692 { PIXMAN_OP_ ## op, \
693 PIXMAN_ ## s, \
694 (SCALED_NEAREST_FLAGS | \
695 FAST_PATH_PAD_REPEAT | \
696 FAST_PATH_X_UNIT_POSITIVE), \
697 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
698 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
699 fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \
700 }
701
702 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE(op,s,d,func) \
703 { PIXMAN_OP_ ## op, \
704 PIXMAN_ ## s, \
705 (SCALED_NEAREST_FLAGS | \
706 FAST_PATH_NONE_REPEAT | \
707 FAST_PATH_X_UNIT_POSITIVE), \
708 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
709 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
710 fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \
711 }
712
713 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER(op,s,d,func) \
714 { PIXMAN_OP_ ## op, \
715 PIXMAN_ ## s, \
716 SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST, \
717 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
718 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
719 fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \
720 }
721
722 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func) \
723 { PIXMAN_OP_ ## op, \
724 PIXMAN_ ## s, \
725 (SCALED_NEAREST_FLAGS | \
726 FAST_PATH_NORMAL_REPEAT | \
727 FAST_PATH_X_UNIT_POSITIVE), \
728 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
729 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
730 fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \
731 }
732
733 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD(op,s,d,func) \
734 { PIXMAN_OP_ ## op, \
735 PIXMAN_ ## s, \
736 (SCALED_NEAREST_FLAGS | \
737 FAST_PATH_PAD_REPEAT | \
738 FAST_PATH_X_UNIT_POSITIVE), \
739 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
740 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
741 fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \
742 }
743
744 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE(op,s,d,func) \
745 { PIXMAN_OP_ ## op, \
746 PIXMAN_ ## s, \
747 (SCALED_NEAREST_FLAGS | \
748 FAST_PATH_NONE_REPEAT | \
749 FAST_PATH_X_UNIT_POSITIVE), \
750 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
751 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
752 fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \
753 }
754
755 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER(op,s,d,func) \
756 { PIXMAN_OP_ ## op, \
757 PIXMAN_ ## s, \
758 SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST, \
759 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
760 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
761 fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \
762 }
763
764 /* Prefer the use of 'cover' variant, because it is faster */
765 #define SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \
766 SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \
767 SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \
768 SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func), \
769 SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func)
770
771 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func) \
772 SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func), \
773 SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func), \
774 SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func)
775
776 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH(op,s,d,func) \
777 SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER (op,s,d,func), \
778 SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \
779 SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD (op,s,d,func)
780
781 /*****************************************************************************/
782
783 /*
784 * Identify 5 zones in each scanline for bilinear scaling. Depending on
785 * whether 2 pixels to be interpolated are fetched from the image itself,
786 * from the padding area around it or from both image and padding area.
787 */
788 static force_inline void
bilinear_pad_repeat_get_scanline_bounds(int32_t source_image_width,pixman_fixed_t vx,pixman_fixed_t unit_x,int32_t * left_pad,int32_t * left_tz,int32_t * width,int32_t * right_tz,int32_t * right_pad)789 bilinear_pad_repeat_get_scanline_bounds (int32_t source_image_width,
790 pixman_fixed_t vx,
791 pixman_fixed_t unit_x,
792 int32_t * left_pad,
793 int32_t * left_tz,
794 int32_t * width,
795 int32_t * right_tz,
796 int32_t * right_pad)
797 {
798 int width1 = *width, left_pad1, right_pad1;
799 int width2 = *width, left_pad2, right_pad2;
800
801 pad_repeat_get_scanline_bounds (source_image_width, vx, unit_x,
802 &width1, &left_pad1, &right_pad1);
803 pad_repeat_get_scanline_bounds (source_image_width, vx + pixman_fixed_1,
804 unit_x, &width2, &left_pad2, &right_pad2);
805
806 *left_pad = left_pad2;
807 *left_tz = left_pad1 - left_pad2;
808 *right_tz = right_pad2 - right_pad1;
809 *right_pad = right_pad1;
810 *width -= *left_pad + *left_tz + *right_tz + *right_pad;
811 }
812
813 /*
814 * Main loop template for single pass bilinear scaling. It needs to be
815 * provided with 'scanline_func' which should do the compositing operation.
816 * The needed function has the following prototype:
817 *
818 * scanline_func (dst_type_t * dst,
819 * const mask_type_ * mask,
820 * const src_type_t * src_top,
821 * const src_type_t * src_bottom,
822 * int32_t width,
823 * int weight_top,
824 * int weight_bottom,
825 * pixman_fixed_t vx,
826 * pixman_fixed_t unit_x,
827 * pixman_fixed_t max_vx,
828 * pixman_bool_t zero_src)
829 *
830 * Where:
831 * dst - destination scanline buffer for storing results
832 * mask - mask buffer (or single value for solid mask)
833 * src_top, src_bottom - two source scanlines
834 * width - number of pixels to process
835 * weight_top - weight of the top row for interpolation
836 * weight_bottom - weight of the bottom row for interpolation
837 * vx - initial position for fetching the first pair of
838 * pixels from the source buffer
839 * unit_x - position increment needed to move to the next pair
840 * of pixels
841 * max_vx - image size as a fixed point value, can be used for
842 * implementing NORMAL repeat (when it is supported)
843 * zero_src - boolean hint variable, which is set to TRUE when
844 * all source pixels are fetched from zero padding
845 * zone for NONE repeat
846 *
847 * Note: normally the sum of 'weight_top' and 'weight_bottom' is equal to
848 * BILINEAR_INTERPOLATION_RANGE, but sometimes it may be less than that
849 * for NONE repeat when handling fuzzy antialiased top or bottom image
850 * edges. Also both top and bottom weight variables are guaranteed to
851 * have value, which is less than BILINEAR_INTERPOLATION_RANGE.
852 * For example, the weights can fit into unsigned byte or be used
853 * with 8-bit SIMD multiplication instructions for 8-bit interpolation
854 * precision.
855 */
856
857 /* Replace a single "scanline_func" with "fetch_func" & "op_func" to allow optional
858 * two stage processing (bilinear fetch to a temp buffer, followed by unscaled
859 * combine), "op_func" may be NULL, in this case we keep old behavior.
860 * This is ugly and gcc issues some warnings, but works.
861 *
862 * An advice: clang has much better error reporting than gcc for deeply nested macros.
863 */
864
865 #define scanline_func(dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
866 scanline_buf, mask, src_top, src_bottom, width, \
867 weight_top, weight_bottom, vx, unit_x, max_vx, zero_src) \
868 do { \
869 if (op_func != NULL) \
870 { \
871 fetch_func ((void *)scanline_buf, (mask), (src_top), (src_bottom), (width), \
872 (weight_top), (weight_bottom), (vx), (unit_x), (max_vx), (zero_src)); \
873 ((void (*)(dst_type_t *, const mask_type_t *, const src_type_t *, int)) op_func)\
874 ((dst), (mask), (src_type_t *)scanline_buf, (width)); \
875 } \
876 else \
877 { \
878 fetch_func ((void*)(dst), (mask), (src_top), (src_bottom), (width), (weight_top), \
879 (weight_bottom), (vx), (unit_x), (max_vx), (zero_src)); \
880 } \
881 } while (0)
882
883
884 #define SCANLINE_BUFFER_LENGTH 3072
885
886 #define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, fetch_func, op_func, src_type_t, \
887 mask_type_t, dst_type_t, repeat_mode, flags) \
888 static void \
889 fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp, \
890 pixman_composite_info_t *info) \
891 { \
892 PIXMAN_COMPOSITE_ARGS (info); \
893 dst_type_t *dst_line; \
894 mask_type_t *mask_line; \
895 src_type_t *src_first_line; \
896 int y1, y2; \
897 pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */ \
898 pixman_vector_t v; \
899 pixman_fixed_t vx, vy; \
900 pixman_fixed_t unit_x, unit_y; \
901 int32_t left_pad, left_tz, right_tz, right_pad; \
902 \
903 dst_type_t *dst; \
904 mask_type_t solid_mask; \
905 const mask_type_t *mask = &solid_mask; \
906 int src_stride, mask_stride, dst_stride; \
907 \
908 int src_width; \
909 pixman_fixed_t src_width_fixed; \
910 int max_x; \
911 pixman_bool_t need_src_extension; \
912 \
913 uint64_t stack_scanline_buffer[SCANLINE_BUFFER_LENGTH]; \
914 uint8_t *scanline_buffer = (uint8_t *) stack_scanline_buffer; \
915 \
916 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1); \
917 if (flags & FLAG_HAVE_SOLID_MASK) \
918 { \
919 solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); \
920 mask_stride = 0; \
921 } \
922 else if (flags & FLAG_HAVE_NON_SOLID_MASK) \
923 { \
924 PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t, \
925 mask_stride, mask_line, 1); \
926 } \
927 \
928 /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \
929 * transformed from destination space to source space */ \
930 PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \
931 \
932 /* reference point is the center of the pixel */ \
933 v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \
934 v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \
935 v.vector[2] = pixman_fixed_1; \
936 \
937 if (!pixman_transform_point_3d (src_image->common.transform, &v)) \
938 return; \
939 \
940 unit_x = src_image->common.transform->matrix[0][0]; \
941 unit_y = src_image->common.transform->matrix[1][1]; \
942 \
943 v.vector[0] -= pixman_fixed_1 / 2; \
944 v.vector[1] -= pixman_fixed_1 / 2; \
945 \
946 vy = v.vector[1]; \
947 \
948 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \
949 PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
950 { \
951 bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, \
952 &left_pad, &left_tz, &width, &right_tz, &right_pad); \
953 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \
954 { \
955 /* PAD repeat does not need special handling for 'transition zones' and */ \
956 /* they can be combined with 'padding zones' safely */ \
957 left_pad += left_tz; \
958 right_pad += right_tz; \
959 left_tz = right_tz = 0; \
960 } \
961 v.vector[0] += left_pad * unit_x; \
962 } \
963 \
964 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
965 { \
966 vx = v.vector[0]; \
967 repeat (PIXMAN_REPEAT_NORMAL, &vx, pixman_int_to_fixed(src_image->bits.width)); \
968 max_x = pixman_fixed_to_int (vx + (width - 1) * (int64_t)unit_x) + 1; \
969 \
970 if (src_image->bits.width < REPEAT_NORMAL_MIN_WIDTH) \
971 { \
972 src_width = 0; \
973 \
974 while (src_width < REPEAT_NORMAL_MIN_WIDTH && src_width <= max_x) \
975 src_width += src_image->bits.width; \
976 \
977 need_src_extension = TRUE; \
978 } \
979 else \
980 { \
981 src_width = src_image->bits.width; \
982 need_src_extension = FALSE; \
983 } \
984 \
985 src_width_fixed = pixman_int_to_fixed (src_width); \
986 } \
987 \
988 if (op_func != NULL && width * sizeof(src_type_t) > sizeof(stack_scanline_buffer)) \
989 { \
990 scanline_buffer = pixman_malloc_ab (width, sizeof(src_type_t)); \
991 \
992 if (!scanline_buffer) \
993 return; \
994 } \
995 \
996 while (--height >= 0) \
997 { \
998 int weight1, weight2; \
999 dst = dst_line; \
1000 dst_line += dst_stride; \
1001 vx = v.vector[0]; \
1002 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
1003 { \
1004 mask = mask_line; \
1005 mask_line += mask_stride; \
1006 } \
1007 \
1008 y1 = pixman_fixed_to_int (vy); \
1009 weight2 = pixman_fixed_to_bilinear_weight (vy); \
1010 if (weight2) \
1011 { \
1012 /* both weight1 and weight2 are smaller than BILINEAR_INTERPOLATION_RANGE */ \
1013 y2 = y1 + 1; \
1014 weight1 = BILINEAR_INTERPOLATION_RANGE - weight2; \
1015 } \
1016 else \
1017 { \
1018 /* set both top and bottom row to the same scanline and tweak weights */ \
1019 y2 = y1; \
1020 weight1 = weight2 = BILINEAR_INTERPOLATION_RANGE / 2; \
1021 } \
1022 vy += unit_y; \
1023 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \
1024 { \
1025 src_type_t *src1, *src2; \
1026 src_type_t buf1[2]; \
1027 src_type_t buf2[2]; \
1028 repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height); \
1029 repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height); \
1030 src1 = src_first_line + src_stride * y1; \
1031 src2 = src_first_line + src_stride * y2; \
1032 \
1033 if (left_pad > 0) \
1034 { \
1035 buf1[0] = buf1[1] = src1[0]; \
1036 buf2[0] = buf2[1] = src2[0]; \
1037 scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
1038 scanline_buffer, mask, buf1, buf2, left_pad, weight1, weight2, \
1039 0, 0, 0, FALSE); \
1040 dst += left_pad; \
1041 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
1042 mask += left_pad; \
1043 } \
1044 if (width > 0) \
1045 { \
1046 scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
1047 scanline_buffer, mask, src1, src2, width, weight1, weight2, \
1048 vx, unit_x, 0, FALSE); \
1049 dst += width; \
1050 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
1051 mask += width; \
1052 } \
1053 if (right_pad > 0) \
1054 { \
1055 buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; \
1056 buf2[0] = buf2[1] = src2[src_image->bits.width - 1]; \
1057 scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
1058 scanline_buffer, mask, buf1, buf2, right_pad, weight1, weight2, \
1059 0, 0, 0, FALSE); \
1060 } \
1061 } \
1062 else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
1063 { \
1064 src_type_t *src1, *src2; \
1065 src_type_t buf1[2]; \
1066 src_type_t buf2[2]; \
1067 /* handle top/bottom zero padding by just setting weights to 0 if needed */ \
1068 if (y1 < 0) \
1069 { \
1070 weight1 = 0; \
1071 y1 = 0; \
1072 } \
1073 if (y1 >= src_image->bits.height) \
1074 { \
1075 weight1 = 0; \
1076 y1 = src_image->bits.height - 1; \
1077 } \
1078 if (y2 < 0) \
1079 { \
1080 weight2 = 0; \
1081 y2 = 0; \
1082 } \
1083 if (y2 >= src_image->bits.height) \
1084 { \
1085 weight2 = 0; \
1086 y2 = src_image->bits.height - 1; \
1087 } \
1088 src1 = src_first_line + src_stride * y1; \
1089 src2 = src_first_line + src_stride * y2; \
1090 \
1091 if (left_pad > 0) \
1092 { \
1093 buf1[0] = buf1[1] = 0; \
1094 buf2[0] = buf2[1] = 0; \
1095 scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
1096 scanline_buffer, mask, buf1, buf2, left_pad, weight1, weight2, \
1097 0, 0, 0, TRUE); \
1098 dst += left_pad; \
1099 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
1100 mask += left_pad; \
1101 } \
1102 if (left_tz > 0) \
1103 { \
1104 buf1[0] = 0; \
1105 buf1[1] = src1[0]; \
1106 buf2[0] = 0; \
1107 buf2[1] = src2[0]; \
1108 scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
1109 scanline_buffer, mask, buf1, buf2, left_tz, weight1, weight2, \
1110 pixman_fixed_frac (vx), unit_x, 0, FALSE); \
1111 dst += left_tz; \
1112 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
1113 mask += left_tz; \
1114 vx += left_tz * unit_x; \
1115 } \
1116 if (width > 0) \
1117 { \
1118 scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
1119 scanline_buffer, mask, src1, src2, width, weight1, weight2, \
1120 vx, unit_x, 0, FALSE); \
1121 dst += width; \
1122 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
1123 mask += width; \
1124 vx += width * unit_x; \
1125 } \
1126 if (right_tz > 0) \
1127 { \
1128 buf1[0] = src1[src_image->bits.width - 1]; \
1129 buf1[1] = 0; \
1130 buf2[0] = src2[src_image->bits.width - 1]; \
1131 buf2[1] = 0; \
1132 scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
1133 scanline_buffer, mask, buf1, buf2, right_tz, weight1, weight2, \
1134 pixman_fixed_frac (vx), unit_x, 0, FALSE); \
1135 dst += right_tz; \
1136 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
1137 mask += right_tz; \
1138 } \
1139 if (right_pad > 0) \
1140 { \
1141 buf1[0] = buf1[1] = 0; \
1142 buf2[0] = buf2[1] = 0; \
1143 scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
1144 scanline_buffer, mask, buf1, buf2, right_pad, weight1, weight2, \
1145 0, 0, 0, TRUE); \
1146 } \
1147 } \
1148 else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
1149 { \
1150 int32_t num_pixels; \
1151 int32_t width_remain; \
1152 src_type_t * src_line_top; \
1153 src_type_t * src_line_bottom; \
1154 src_type_t buf1[2]; \
1155 src_type_t buf2[2]; \
1156 src_type_t extended_src_line0[REPEAT_NORMAL_MIN_WIDTH*2]; \
1157 src_type_t extended_src_line1[REPEAT_NORMAL_MIN_WIDTH*2]; \
1158 int i, j; \
1159 \
1160 repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); \
1161 repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); \
1162 src_line_top = src_first_line + src_stride * y1; \
1163 src_line_bottom = src_first_line + src_stride * y2; \
1164 \
1165 if (need_src_extension) \
1166 { \
1167 for (i=0; i<src_width;) \
1168 { \
1169 for (j=0; j<src_image->bits.width; j++, i++) \
1170 { \
1171 extended_src_line0[i] = src_line_top[j]; \
1172 extended_src_line1[i] = src_line_bottom[j]; \
1173 } \
1174 } \
1175 \
1176 src_line_top = &extended_src_line0[0]; \
1177 src_line_bottom = &extended_src_line1[0]; \
1178 } \
1179 \
1180 /* Top & Bottom wrap around buffer */ \
1181 buf1[0] = src_line_top[src_width - 1]; \
1182 buf1[1] = src_line_top[0]; \
1183 buf2[0] = src_line_bottom[src_width - 1]; \
1184 buf2[1] = src_line_bottom[0]; \
1185 \
1186 width_remain = width; \
1187 \
1188 while (width_remain > 0) \
1189 { \
1190 /* We use src_width_fixed because it can make vx in original source range */ \
1191 repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); \
1192 \
1193 /* Wrap around part */ \
1194 if (pixman_fixed_to_int (vx) == src_width - 1) \
1195 { \
1196 /* for positive unit_x \
1197 * num_pixels = max(n) + 1, where vx + n*unit_x < src_width_fixed \
1198 * \
1199 * vx is in range [0, src_width_fixed - pixman_fixed_e] \
1200 * So we are safe from overflow. \
1201 */ \
1202 num_pixels = ((src_width_fixed - vx - pixman_fixed_e) / unit_x) + 1; \
1203 \
1204 if (num_pixels > width_remain) \
1205 num_pixels = width_remain; \
1206 \
1207 scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, \
1208 dst, scanline_buffer, mask, buf1, buf2, num_pixels, \
1209 weight1, weight2, pixman_fixed_frac(vx), \
1210 unit_x, src_width_fixed, FALSE); \
1211 \
1212 width_remain -= num_pixels; \
1213 vx += num_pixels * unit_x; \
1214 dst += num_pixels; \
1215 \
1216 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
1217 mask += num_pixels; \
1218 \
1219 repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); \
1220 } \
1221 \
1222 /* Normal scanline composite */ \
1223 if (pixman_fixed_to_int (vx) != src_width - 1 && width_remain > 0) \
1224 { \
1225 /* for positive unit_x \
1226 * num_pixels = max(n) + 1, where vx + n*unit_x < (src_width_fixed - 1) \
1227 * \
1228 * vx is in range [0, src_width_fixed - pixman_fixed_e] \
1229 * So we are safe from overflow here. \
1230 */ \
1231 num_pixels = ((src_width_fixed - pixman_fixed_1 - vx - pixman_fixed_e) \
1232 / unit_x) + 1; \
1233 \
1234 if (num_pixels > width_remain) \
1235 num_pixels = width_remain; \
1236 \
1237 scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, \
1238 dst, scanline_buffer, mask, src_line_top, src_line_bottom, \
1239 num_pixels, weight1, weight2, vx, unit_x, src_width_fixed, \
1240 FALSE); \
1241 \
1242 width_remain -= num_pixels; \
1243 vx += num_pixels * unit_x; \
1244 dst += num_pixels; \
1245 \
1246 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
1247 mask += num_pixels; \
1248 } \
1249 } \
1250 } \
1251 else \
1252 { \
1253 scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
1254 scanline_buffer, mask, \
1255 src_first_line + src_stride * y1, \
1256 src_first_line + src_stride * y2, width, \
1257 weight1, weight2, vx, unit_x, max_vx, FALSE); \
1258 } \
1259 } \
1260 if (scanline_buffer != (uint8_t *) stack_scanline_buffer) \
1261 free (scanline_buffer); \
1262 }
1263
1264 /* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
1265 #define FAST_BILINEAR_MAINLOOP_COMMON(scale_func_name, fetch_func, op_func, src_type_t, mask_type_t,\
1266 dst_type_t, repeat_mode, flags) \
1267 FAST_BILINEAR_MAINLOOP_INT(_ ## scale_func_name, fetch_func, op_func, src_type_t, mask_type_t,\
1268 dst_type_t, repeat_mode, flags)
1269
1270 #define SCALED_BILINEAR_FLAGS \
1271 (FAST_PATH_SCALE_TRANSFORM | \
1272 FAST_PATH_NO_ALPHA_MAP | \
1273 FAST_PATH_BILINEAR_FILTER | \
1274 FAST_PATH_NO_ACCESSORS | \
1275 FAST_PATH_NARROW_FORMAT)
1276
1277 #define SIMPLE_BILINEAR_FAST_PATH_PAD(op,s,d,func) \
1278 { PIXMAN_OP_ ## op, \
1279 PIXMAN_ ## s, \
1280 (SCALED_BILINEAR_FLAGS | \
1281 FAST_PATH_PAD_REPEAT | \
1282 FAST_PATH_X_UNIT_POSITIVE), \
1283 PIXMAN_null, 0, \
1284 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1285 fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \
1286 }
1287
1288 #define SIMPLE_BILINEAR_FAST_PATH_NONE(op,s,d,func) \
1289 { PIXMAN_OP_ ## op, \
1290 PIXMAN_ ## s, \
1291 (SCALED_BILINEAR_FLAGS | \
1292 FAST_PATH_NONE_REPEAT | \
1293 FAST_PATH_X_UNIT_POSITIVE), \
1294 PIXMAN_null, 0, \
1295 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1296 fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \
1297 }
1298
1299 #define SIMPLE_BILINEAR_FAST_PATH_COVER(op,s,d,func) \
1300 { PIXMAN_OP_ ## op, \
1301 PIXMAN_ ## s, \
1302 SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR, \
1303 PIXMAN_null, 0, \
1304 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1305 fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \
1306 }
1307
1308 #define SIMPLE_BILINEAR_FAST_PATH_NORMAL(op,s,d,func) \
1309 { PIXMAN_OP_ ## op, \
1310 PIXMAN_ ## s, \
1311 (SCALED_BILINEAR_FLAGS | \
1312 FAST_PATH_NORMAL_REPEAT | \
1313 FAST_PATH_X_UNIT_POSITIVE), \
1314 PIXMAN_null, 0, \
1315 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1316 fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op, \
1317 }
1318
1319 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD(op,s,d,func) \
1320 { PIXMAN_OP_ ## op, \
1321 PIXMAN_ ## s, \
1322 (SCALED_BILINEAR_FLAGS | \
1323 FAST_PATH_PAD_REPEAT | \
1324 FAST_PATH_X_UNIT_POSITIVE), \
1325 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
1326 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1327 fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \
1328 }
1329
1330 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE(op,s,d,func) \
1331 { PIXMAN_OP_ ## op, \
1332 PIXMAN_ ## s, \
1333 (SCALED_BILINEAR_FLAGS | \
1334 FAST_PATH_NONE_REPEAT | \
1335 FAST_PATH_X_UNIT_POSITIVE), \
1336 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
1337 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1338 fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \
1339 }
1340
1341 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER(op,s,d,func) \
1342 { PIXMAN_OP_ ## op, \
1343 PIXMAN_ ## s, \
1344 SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR, \
1345 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
1346 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1347 fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \
1348 }
1349
1350 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL(op,s,d,func) \
1351 { PIXMAN_OP_ ## op, \
1352 PIXMAN_ ## s, \
1353 (SCALED_BILINEAR_FLAGS | \
1354 FAST_PATH_NORMAL_REPEAT | \
1355 FAST_PATH_X_UNIT_POSITIVE), \
1356 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
1357 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1358 fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op, \
1359 }
1360
1361 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD(op,s,d,func) \
1362 { PIXMAN_OP_ ## op, \
1363 PIXMAN_ ## s, \
1364 (SCALED_BILINEAR_FLAGS | \
1365 FAST_PATH_PAD_REPEAT | \
1366 FAST_PATH_X_UNIT_POSITIVE), \
1367 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
1368 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1369 fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \
1370 }
1371
1372 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE(op,s,d,func) \
1373 { PIXMAN_OP_ ## op, \
1374 PIXMAN_ ## s, \
1375 (SCALED_BILINEAR_FLAGS | \
1376 FAST_PATH_NONE_REPEAT | \
1377 FAST_PATH_X_UNIT_POSITIVE), \
1378 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
1379 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1380 fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \
1381 }
1382
1383 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER(op,s,d,func) \
1384 { PIXMAN_OP_ ## op, \
1385 PIXMAN_ ## s, \
1386 SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR, \
1387 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
1388 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1389 fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \
1390 }
1391
1392 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func) \
1393 { PIXMAN_OP_ ## op, \
1394 PIXMAN_ ## s, \
1395 (SCALED_BILINEAR_FLAGS | \
1396 FAST_PATH_NORMAL_REPEAT | \
1397 FAST_PATH_X_UNIT_POSITIVE), \
1398 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
1399 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1400 fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op, \
1401 }
1402
1403 /* Prefer the use of 'cover' variant, because it is faster */
1404 #define SIMPLE_BILINEAR_FAST_PATH(op,s,d,func) \
1405 SIMPLE_BILINEAR_FAST_PATH_COVER (op,s,d,func), \
1406 SIMPLE_BILINEAR_FAST_PATH_NONE (op,s,d,func), \
1407 SIMPLE_BILINEAR_FAST_PATH_PAD (op,s,d,func), \
1408 SIMPLE_BILINEAR_FAST_PATH_NORMAL (op,s,d,func)
1409
1410 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH(op,s,d,func) \
1411 SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER (op,s,d,func), \
1412 SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE (op,s,d,func), \
1413 SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD (op,s,d,func), \
1414 SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL (op,s,d,func)
1415
1416 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH(op,s,d,func) \
1417 SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER (op,s,d,func), \
1418 SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \
1419 SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD (op,s,d,func), \
1420 SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL (op,s,d,func)
1421
1422 #endif
1423