1 /* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
2 /*
3  * Copyright © 2000 SuSE, Inc.
4  * Copyright © 2007 Red Hat, Inc.
5  *
6  * Permission to use, copy, modify, distribute, and sell this software and its
7  * documentation for any purpose is hereby granted without fee, provided that
8  * the above copyright notice appear in all copies and that both that
9  * copyright notice and this permission notice appear in supporting
10  * documentation, and that the name of SuSE not be used in advertising or
11  * publicity pertaining to distribution of the software without specific,
12  * written prior permission.  SuSE makes no representations about the
13  * suitability of this software for any purpose.  It is provided "as is"
14  * without express or implied warranty.
15  *
16  * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
18  * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
20  * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
21  * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
22  *
23  * Author:  Keith Packard, SuSE, Inc.
24  */
25 
26 #ifndef PIXMAN_FAST_PATH_H__
27 #define PIXMAN_FAST_PATH_H__
28 
29 #include <stdlib.h>
30 #include "pixman-private.h"
31 
32 #define PIXMAN_REPEAT_COVER -1
33 
34 /* Flags describing input parameters to fast path macro template.
35  * Turning on some flag values may indicate that
36  * "some property X is available so template can use this" or
37  * "some property X should be handled by template".
38  *
39  * FLAG_HAVE_SOLID_MASK
40  *  Input mask is solid so template should handle this.
41  *
42  * FLAG_HAVE_NON_SOLID_MASK
43  *  Input mask is bits mask so template should handle this.
44  *
45  * FLAG_HAVE_SOLID_MASK and FLAG_HAVE_NON_SOLID_MASK are mutually
46  * exclusive. (It's not allowed to turn both flags on)
47  */
48 #define FLAG_NONE				(0)
49 #define FLAG_HAVE_SOLID_MASK			(1 <<   1)
50 #define FLAG_HAVE_NON_SOLID_MASK		(1 <<   2)
51 
52 /* To avoid too short repeated scanline function calls, extend source
53  * scanlines having width less than below constant value.
54  */
55 #define REPEAT_NORMAL_MIN_WIDTH			64
56 
57 static force_inline pixman_bool_t
repeat(pixman_repeat_t repeat,int * c,int size)58 repeat (pixman_repeat_t repeat, int *c, int size)
59 {
60     if (repeat == PIXMAN_REPEAT_NONE)
61     {
62 	if (*c < 0 || *c >= size)
63 	    return FALSE;
64     }
65     else if (repeat == PIXMAN_REPEAT_NORMAL)
66     {
67 	while (*c >= size)
68 	    *c -= size;
69 	while (*c < 0)
70 	    *c += size;
71     }
72     else if (repeat == PIXMAN_REPEAT_PAD)
73     {
74 	*c = CLIP (*c, 0, size - 1);
75     }
76     else /* REFLECT */
77     {
78 	*c = MOD (*c, size * 2);
79 	if (*c >= size)
80 	    *c = size * 2 - *c - 1;
81     }
82     return TRUE;
83 }
84 
85 static force_inline int
pixman_fixed_to_bilinear_weight(pixman_fixed_t x)86 pixman_fixed_to_bilinear_weight (pixman_fixed_t x)
87 {
88     return (x >> (16 - BILINEAR_INTERPOLATION_BITS)) &
89 	   ((1 << BILINEAR_INTERPOLATION_BITS) - 1);
90 }
91 
92 #if BILINEAR_INTERPOLATION_BITS <= 4
93 /* Inspired by Filter_32_opaque from Skia */
94 static force_inline uint32_t
bilinear_interpolation(uint32_t tl,uint32_t tr,uint32_t bl,uint32_t br,int distx,int disty)95 bilinear_interpolation (uint32_t tl, uint32_t tr,
96 			uint32_t bl, uint32_t br,
97 			int distx, int disty)
98 {
99     int distxy, distxiy, distixy, distixiy;
100     uint32_t lo, hi;
101 
102     distx <<= (4 - BILINEAR_INTERPOLATION_BITS);
103     disty <<= (4 - BILINEAR_INTERPOLATION_BITS);
104 
105     distxy = distx * disty;
106     distxiy = (distx << 4) - distxy;	/* distx * (16 - disty) */
107     distixy = (disty << 4) - distxy;	/* disty * (16 - distx) */
108     distixiy =
109 	16 * 16 - (disty << 4) -
110 	(distx << 4) + distxy; /* (16 - distx) * (16 - disty) */
111 
112     lo = (tl & 0xff00ff) * distixiy;
113     hi = ((tl >> 8) & 0xff00ff) * distixiy;
114 
115     lo += (tr & 0xff00ff) * distxiy;
116     hi += ((tr >> 8) & 0xff00ff) * distxiy;
117 
118     lo += (bl & 0xff00ff) * distixy;
119     hi += ((bl >> 8) & 0xff00ff) * distixy;
120 
121     lo += (br & 0xff00ff) * distxy;
122     hi += ((br >> 8) & 0xff00ff) * distxy;
123 
124     return ((lo >> 8) & 0xff00ff) | (hi & ~0xff00ff);
125 }
126 
127 #else
128 #if SIZEOF_LONG > 4
129 
130 static force_inline uint32_t
bilinear_interpolation(uint32_t tl,uint32_t tr,uint32_t bl,uint32_t br,int distx,int disty)131 bilinear_interpolation (uint32_t tl, uint32_t tr,
132 			uint32_t bl, uint32_t br,
133 			int distx, int disty)
134 {
135     uint64_t distxy, distxiy, distixy, distixiy;
136     uint64_t tl64, tr64, bl64, br64;
137     uint64_t f, r;
138 
139     distx <<= (8 - BILINEAR_INTERPOLATION_BITS);
140     disty <<= (8 - BILINEAR_INTERPOLATION_BITS);
141 
142     distxy = distx * disty;
143     distxiy = distx * (256 - disty);
144     distixy = (256 - distx) * disty;
145     distixiy = (256 - distx) * (256 - disty);
146 
147     /* Alpha and Blue */
148     tl64 = tl & 0xff0000ff;
149     tr64 = tr & 0xff0000ff;
150     bl64 = bl & 0xff0000ff;
151     br64 = br & 0xff0000ff;
152 
153     f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy;
154     r = f & 0x0000ff0000ff0000ull;
155 
156     /* Red and Green */
157     tl64 = tl;
158     tl64 = ((tl64 << 16) & 0x000000ff00000000ull) | (tl64 & 0x0000ff00ull);
159 
160     tr64 = tr;
161     tr64 = ((tr64 << 16) & 0x000000ff00000000ull) | (tr64 & 0x0000ff00ull);
162 
163     bl64 = bl;
164     bl64 = ((bl64 << 16) & 0x000000ff00000000ull) | (bl64 & 0x0000ff00ull);
165 
166     br64 = br;
167     br64 = ((br64 << 16) & 0x000000ff00000000ull) | (br64 & 0x0000ff00ull);
168 
169     f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy;
170     r |= ((f >> 16) & 0x000000ff00000000ull) | (f & 0xff000000ull);
171 
172     return (uint32_t)(r >> 16);
173 }
174 
175 #else
176 
177 #ifdef LOW_QUALITY_INTERPOLATION
178 /* Based on Filter_32_opaque_portable from Skia */
179 static force_inline uint32_t
bilinear_interpolation(uint32_t a00,uint32_t a01,uint32_t a10,uint32_t a11,int x,int y)180 bilinear_interpolation(uint32_t a00, uint32_t a01,
181 		       uint32_t a10, uint32_t a11,
182 		       int x, int y)
183 {
184     int xy = x * y;
185     static const uint32_t mask = 0xff00ff;
186 
187     int scale = 256 - 16*y - 16*x + xy;
188     uint32_t lo = (a00 & mask) * scale;
189     uint32_t hi = ((a00 >> 8) & mask) * scale;
190 
191     scale = 16*x - xy;
192     lo += (a01 & mask) * scale;
193     hi += ((a01 >> 8) & mask) * scale;
194 
195     scale = 16*y - xy;
196     lo += (a10 & mask) * scale;
197     hi += ((a10 >> 8) & mask) * scale;
198 
199     lo += (a11 & mask) * xy;
200     hi += ((a11 >> 8) & mask) * xy;
201 
202     return ((lo >> 8) & mask) | (hi & ~mask);
203 }
204 #else
205 static force_inline uint32_t
bilinear_interpolation(uint32_t tl,uint32_t tr,uint32_t bl,uint32_t br,int distx,int disty)206 bilinear_interpolation (uint32_t tl, uint32_t tr,
207 			uint32_t bl, uint32_t br,
208 			int distx, int disty)
209 {
210     int distxy, distxiy, distixy, distixiy;
211     uint32_t f, r;
212 
213     distx <<= (8 - BILINEAR_INTERPOLATION_BITS);
214     disty <<= (8 - BILINEAR_INTERPOLATION_BITS);
215 
216     distxy = distx * disty;
217     distxiy = (distx << 8) - distxy;	/* distx * (256 - disty) */
218     distixy = (disty << 8) - distxy;	/* disty * (256 - distx) */
219     distixiy =
220 	256 * 256 - (disty << 8) -
221 	(distx << 8) + distxy;		/* (256 - distx) * (256 - disty) */
222 
223     /* Blue */
224     r = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy
225       + (bl & 0x000000ff) * distixy  + (br & 0x000000ff) * distxy;
226 
227     /* Green */
228     f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy
229       + (bl & 0x0000ff00) * distixy  + (br & 0x0000ff00) * distxy;
230     r |= f & 0xff000000;
231 
232     tl >>= 16;
233     tr >>= 16;
234     bl >>= 16;
235     br >>= 16;
236     r >>= 16;
237 
238     /* Red */
239     f = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy
240       + (bl & 0x000000ff) * distixy  + (br & 0x000000ff) * distxy;
241     r |= f & 0x00ff0000;
242 
243     /* Alpha */
244     f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy
245       + (bl & 0x0000ff00) * distixy  + (br & 0x0000ff00) * distxy;
246     r |= f & 0xff000000;
247 
248     return r;
249 }
250 #endif
251 #endif
252 #endif // BILINEAR_INTERPOLATION_BITS <= 4
253 
254 /*
255  * For each scanline fetched from source image with PAD repeat:
256  * - calculate how many pixels need to be padded on the left side
257  * - calculate how many pixels need to be padded on the right side
258  * - update width to only count pixels which are fetched from the image
259  * All this information is returned via 'width', 'left_pad', 'right_pad'
260  * arguments. The code is assuming that 'unit_x' is positive.
261  *
262  * Note: 64-bit math is used in order to avoid potential overflows, which
263  *       is probably excessive in many cases. This particular function
264  *       may need its own correctness test and performance tuning.
265  */
266 static force_inline void
pad_repeat_get_scanline_bounds(int32_t source_image_width,pixman_fixed_t vx,pixman_fixed_t unit_x,int32_t * width,int32_t * left_pad,int32_t * right_pad)267 pad_repeat_get_scanline_bounds (int32_t         source_image_width,
268 				pixman_fixed_t  vx,
269 				pixman_fixed_t  unit_x,
270 				int32_t *       width,
271 				int32_t *       left_pad,
272 				int32_t *       right_pad)
273 {
274     int64_t max_vx = (int64_t) source_image_width << 16;
275     int64_t tmp;
276     if (vx < 0)
277     {
278 	tmp = ((int64_t) unit_x - 1 - vx) / unit_x;
279 	if (tmp > *width)
280 	{
281 	    *left_pad = *width;
282 	    *width = 0;
283 	}
284 	else
285 	{
286 	    *left_pad = (int32_t) tmp;
287 	    *width -= (int32_t) tmp;
288 	}
289     }
290     else
291     {
292 	*left_pad = 0;
293     }
294     tmp = ((int64_t) unit_x - 1 - vx + max_vx) / unit_x - *left_pad;
295     if (tmp < 0)
296     {
297 	*right_pad = *width;
298 	*width = 0;
299     }
300     else if (tmp >= *width)
301     {
302 	*right_pad = 0;
303     }
304     else
305     {
306 	*right_pad = *width - (int32_t) tmp;
307 	*width = (int32_t) tmp;
308     }
309 }
310 
311 /* A macroified version of specialized nearest scalers for some
312  * common 8888 and 565 formats. It supports SRC and OVER ops.
313  *
314  * There are two repeat versions, one that handles repeat normal,
315  * and one without repeat handling that only works if the src region
316  * used is completely covered by the pre-repeated source samples.
317  *
318  * The loops are unrolled to process two pixels per iteration for better
319  * performance on most CPU architectures (superscalar processors
320  * can issue several operations simultaneously, other processors can hide
321  * instructions latencies by pipelining operations). Unrolling more
322  * does not make much sense because the compiler will start running out
323  * of spare registers soon.
324  */
325 
326 #define GET_8888_ALPHA(s) ((s) >> 24)
327  /* This is not actually used since we don't have an OVER with
328     565 source, but it is needed to build. */
329 #define GET_0565_ALPHA(s) 0xff
330 #define GET_x888_ALPHA(s) 0xff
331 
332 #define FAST_NEAREST_SCANLINE(scanline_func_name, SRC_FORMAT, DST_FORMAT,			\
333 			      src_type_t, dst_type_t, OP, repeat_mode)				\
334 static force_inline void									\
335 scanline_func_name (dst_type_t       *dst,							\
336 		    const src_type_t *src,							\
337 		    int32_t           w,							\
338 		    pixman_fixed_t    vx,							\
339 		    pixman_fixed_t    unit_x,							\
340 		    pixman_fixed_t    src_width_fixed,						\
341 		    pixman_bool_t     fully_transparent_src)					\
342 {												\
343 	uint32_t   d;										\
344 	src_type_t s1, s2;									\
345 	uint8_t    a1, a2;									\
346 	int        x1, x2;									\
347 												\
348 	if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER && fully_transparent_src)			\
349 	    return;										\
350 												\
351 	if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER)		\
352 	    abort();										\
353 												\
354 	while ((w -= 2) >= 0)									\
355 	{											\
356 	    x1 = pixman_fixed_to_int (vx);							\
357 	    vx += unit_x;									\
358 	    if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)				\
359 	    {											\
360 		/* This works because we know that unit_x is positive */			\
361 		while (vx >= 0)									\
362 		    vx -= src_width_fixed;							\
363 	    }											\
364 	    s1 = *(src + x1);									\
365 												\
366 	    x2 = pixman_fixed_to_int (vx);							\
367 	    vx += unit_x;									\
368 	    if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)				\
369 	    {											\
370 		/* This works because we know that unit_x is positive */			\
371 		while (vx >= 0)									\
372 		    vx -= src_width_fixed;							\
373 	    }											\
374 	    s2 = *(src + x2);									\
375 												\
376 	    if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER)						\
377 	    {											\
378 		a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1);						\
379 		a2 = GET_ ## SRC_FORMAT ## _ALPHA(s2);						\
380 												\
381 		if (a1 == 0xff)									\
382 		{										\
383 		    *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1);			\
384 		}										\
385 		else if (s1)									\
386 		{										\
387 		    d = convert_ ## DST_FORMAT ## _to_8888 (*dst);				\
388 		    s1 = convert_ ## SRC_FORMAT ## _to_8888 (s1);				\
389 		    a1 ^= 0xff;									\
390 		    UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1);					\
391 		    *dst = convert_8888_to_ ## DST_FORMAT (d);					\
392 		}										\
393 		dst++;										\
394 												\
395 		if (a2 == 0xff)									\
396 		{										\
397 		    *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s2);			\
398 		}										\
399 		else if (s2)									\
400 		{										\
401 		    d = convert_## DST_FORMAT ## _to_8888 (*dst);				\
402 		    s2 = convert_## SRC_FORMAT ## _to_8888 (s2);				\
403 		    a2 ^= 0xff;									\
404 		    UN8x4_MUL_UN8_ADD_UN8x4 (d, a2, s2);					\
405 		    *dst = convert_8888_to_ ## DST_FORMAT (d);					\
406 		}										\
407 		dst++;										\
408 	    }											\
409 	    else /* PIXMAN_OP_SRC */								\
410 	    {											\
411 		*dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1);			\
412 		*dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s2);			\
413 	    }											\
414 	}											\
415 												\
416 	if (w & 1)										\
417 	{											\
418 	    x1 = pixman_fixed_to_int (vx);							\
419 	    s1 = *(src + x1);									\
420 												\
421 	    if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER)						\
422 	    {											\
423 		a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1);						\
424 												\
425 		if (a1 == 0xff)									\
426 		{										\
427 		    *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1);			\
428 		}										\
429 		else if (s1)									\
430 		{										\
431 		    d = convert_## DST_FORMAT ## _to_8888 (*dst);				\
432 		    s1 = convert_ ## SRC_FORMAT ## _to_8888 (s1);				\
433 		    a1 ^= 0xff;									\
434 		    UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1);					\
435 		    *dst = convert_8888_to_ ## DST_FORMAT (d);					\
436 		}										\
437 		dst++;										\
438 	    }											\
439 	    else /* PIXMAN_OP_SRC */								\
440 	    {											\
441 		*dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1);			\
442 	    }											\
443 	}											\
444 }
445 
446 #define FAST_NEAREST_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t,	\
447 				  dst_type_t, repeat_mode, have_mask, mask_is_solid)		\
448 static void											\
449 fast_composite_scaled_nearest  ## scale_func_name (pixman_implementation_t *imp,		\
450 						   pixman_composite_info_t *info)               \
451 {												\
452     PIXMAN_COMPOSITE_ARGS (info);					                        \
453     dst_type_t *dst_line;						                        \
454     mask_type_t *mask_line;									\
455     src_type_t *src_first_line;									\
456     int       y;										\
457     pixman_fixed_t src_width_fixed = pixman_int_to_fixed (src_image->bits.width);		\
458     pixman_fixed_t max_vy;									\
459     pixman_vector_t v;										\
460     pixman_fixed_t vx, vy;									\
461     pixman_fixed_t unit_x, unit_y;								\
462     int32_t left_pad, right_pad;								\
463 												\
464     src_type_t *src;										\
465     dst_type_t *dst;										\
466     mask_type_t solid_mask;									\
467     const mask_type_t *mask = &solid_mask;							\
468     int src_stride, mask_stride, dst_stride;							\
469 												\
470     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1);	\
471     if (have_mask)										\
472     {												\
473 	if (mask_is_solid)									\
474 	    solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format);	\
475 	else											\
476 	    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t,			\
477 				   mask_stride, mask_line, 1);					\
478     }												\
479     /* pass in 0 instead of src_x and src_y because src_x and src_y need to be			\
480      * transformed from destination space to source space */					\
481     PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1);		\
482 												\
483     /* reference point is the center of the pixel */						\
484     v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;				\
485     v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;				\
486     v.vector[2] = pixman_fixed_1;								\
487 												\
488     if (!pixman_transform_point_3d (src_image->common.transform, &v))				\
489 	return;											\
490 												\
491     unit_x = src_image->common.transform->matrix[0][0];						\
492     unit_y = src_image->common.transform->matrix[1][1];						\
493 												\
494     /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */			\
495     v.vector[0] -= pixman_fixed_e;								\
496     v.vector[1] -= pixman_fixed_e;								\
497 												\
498     vx = v.vector[0];										\
499     vy = v.vector[1];										\
500 												\
501     if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)					\
502     {												\
503 	max_vy = pixman_int_to_fixed (src_image->bits.height);					\
504 												\
505 	/* Clamp repeating positions inside the actual samples */				\
506 	repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed);					\
507 	repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy);						\
508     }												\
509 												\
510     if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD ||					\
511 	PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)					\
512     {												\
513 	pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x,			\
514 					&width, &left_pad, &right_pad);				\
515 	vx += left_pad * unit_x;								\
516     }												\
517 												\
518     while (--height >= 0)									\
519     {												\
520 	dst = dst_line;										\
521 	dst_line += dst_stride;									\
522 	if (have_mask && !mask_is_solid)							\
523 	{											\
524 	    mask = mask_line;									\
525 	    mask_line += mask_stride;								\
526 	}											\
527 												\
528 	y = pixman_fixed_to_int (vy);								\
529 	vy += unit_y;										\
530 	if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)				\
531 	    repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy);						\
532 	if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD)					\
533 	{											\
534 	    repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height);				\
535 	    src = src_first_line + src_stride * y;						\
536 	    if (left_pad > 0)									\
537 	    {											\
538 		scanline_func (mask, dst,							\
539 			       src + src_image->bits.width - src_image->bits.width + 1,		\
540 			       left_pad, -pixman_fixed_e, 0, src_width_fixed, FALSE);		\
541 	    }											\
542 	    if (width > 0)									\
543 	    {											\
544 		scanline_func (mask + (mask_is_solid ? 0 : left_pad),				\
545 			       dst + left_pad, src + src_image->bits.width, width,		\
546 			       vx - src_width_fixed, unit_x, src_width_fixed, FALSE);		\
547 	    }											\
548 	    if (right_pad > 0)									\
549 	    {											\
550 		scanline_func (mask + (mask_is_solid ? 0 : left_pad + width),			\
551 			       dst + left_pad + width, src + src_image->bits.width,		\
552 			       right_pad, -pixman_fixed_e, 0, src_width_fixed, FALSE);		\
553 	    }											\
554 	}											\
555 	else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)				\
556 	{											\
557 	    static const src_type_t zero[1] = { 0 };						\
558 	    if (y < 0 || y >= src_image->bits.height)						\
559 	    {											\
560 		scanline_func (mask, dst, zero + 1, left_pad + width + right_pad,		\
561 			       -pixman_fixed_e, 0, src_width_fixed, TRUE);			\
562 		continue;									\
563 	    }											\
564 	    src = src_first_line + src_stride * y;						\
565 	    if (left_pad > 0)									\
566 	    {											\
567 		scanline_func (mask, dst, zero + 1, left_pad,					\
568 			       -pixman_fixed_e, 0, src_width_fixed, TRUE);			\
569 	    }											\
570 	    if (width > 0)									\
571 	    {											\
572 		scanline_func (mask + (mask_is_solid ? 0 : left_pad),				\
573 			       dst + left_pad, src + src_image->bits.width, width,		\
574 			       vx - src_width_fixed, unit_x, src_width_fixed, FALSE);		\
575 	    }											\
576 	    if (right_pad > 0)									\
577 	    {											\
578 		scanline_func (mask + (mask_is_solid ? 0 : left_pad + width),			\
579 			       dst + left_pad + width, zero + 1, right_pad,			\
580 			       -pixman_fixed_e, 0, src_width_fixed, TRUE);			\
581 	    }											\
582 	}											\
583 	else											\
584 	{											\
585 	    src = src_first_line + src_stride * y;						\
586 	    scanline_func (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed,	\
587 			   unit_x, src_width_fixed, FALSE);					\
588 	}											\
589     }												\
590 }
591 
592 /* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
593 #define FAST_NEAREST_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t,	\
594 				  dst_type_t, repeat_mode, have_mask, mask_is_solid)		\
595 	FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t,	\
596 				  dst_type_t, repeat_mode, have_mask, mask_is_solid)
597 
598 #define FAST_NEAREST_MAINLOOP_NOMASK(scale_func_name, scanline_func, src_type_t, dst_type_t,	\
599 			      repeat_mode)							\
600     static force_inline void									\
601     scanline_func##scale_func_name##_wrapper (							\
602 		    const uint8_t    *mask,							\
603 		    dst_type_t       *dst,							\
604 		    const src_type_t *src,							\
605 		    int32_t          w,								\
606 		    pixman_fixed_t   vx,							\
607 		    pixman_fixed_t   unit_x,							\
608 		    pixman_fixed_t   max_vx,							\
609 		    pixman_bool_t    fully_transparent_src)					\
610     {												\
611 	scanline_func (dst, src, w, vx, unit_x, max_vx, fully_transparent_src);			\
612     }												\
613     FAST_NEAREST_MAINLOOP_INT (scale_func_name, scanline_func##scale_func_name##_wrapper,	\
614 			       src_type_t, uint8_t, dst_type_t, repeat_mode, FALSE, FALSE)
615 
616 #define FAST_NEAREST_MAINLOOP(scale_func_name, scanline_func, src_type_t, dst_type_t,		\
617 			      repeat_mode)							\
618 	FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name, scanline_func, src_type_t,		\
619 			      dst_type_t, repeat_mode)
620 
621 #define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT,				\
622 		     src_type_t, dst_type_t, OP, repeat_mode)				\
623     FAST_NEAREST_SCANLINE(scaled_nearest_scanline_ ## scale_func_name ## _ ## OP,	\
624 			  SRC_FORMAT, DST_FORMAT, src_type_t, dst_type_t,		\
625 			  OP, repeat_mode)						\
626     FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name ## _ ## OP,			\
627 			  scaled_nearest_scanline_ ## scale_func_name ## _ ## OP,	\
628 			  src_type_t, dst_type_t, repeat_mode)
629 
630 
631 #define SCALED_NEAREST_FLAGS						\
632     (FAST_PATH_SCALE_TRANSFORM	|					\
633      FAST_PATH_NO_ALPHA_MAP	|					\
634      FAST_PATH_NEAREST_FILTER	|					\
635      FAST_PATH_NO_ACCESSORS	|					\
636      FAST_PATH_NARROW_FORMAT)
637 
638 #define SIMPLE_NEAREST_FAST_PATH_NORMAL(op,s,d,func)			\
639     {   PIXMAN_OP_ ## op,						\
640 	PIXMAN_ ## s,							\
641 	(SCALED_NEAREST_FLAGS		|				\
642 	 FAST_PATH_NORMAL_REPEAT	|				\
643 	 FAST_PATH_X_UNIT_POSITIVE),					\
644 	PIXMAN_null, 0,							\
645 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
646 	fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op,	\
647     }
648 
649 #define SIMPLE_NEAREST_FAST_PATH_PAD(op,s,d,func)			\
650     {   PIXMAN_OP_ ## op,						\
651 	PIXMAN_ ## s,							\
652 	(SCALED_NEAREST_FLAGS		|				\
653 	 FAST_PATH_PAD_REPEAT		|				\
654 	 FAST_PATH_X_UNIT_POSITIVE),					\
655 	PIXMAN_null, 0,							\
656 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
657 	fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op,	\
658     }
659 
660 #define SIMPLE_NEAREST_FAST_PATH_NONE(op,s,d,func)			\
661     {   PIXMAN_OP_ ## op,						\
662 	PIXMAN_ ## s,							\
663 	(SCALED_NEAREST_FLAGS		|				\
664 	 FAST_PATH_NONE_REPEAT		|				\
665 	 FAST_PATH_X_UNIT_POSITIVE),					\
666 	PIXMAN_null, 0,							\
667 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
668 	fast_composite_scaled_nearest_ ## func ## _none ## _ ## op,	\
669     }
670 
671 #define SIMPLE_NEAREST_FAST_PATH_COVER(op,s,d,func)			\
672     {   PIXMAN_OP_ ## op,						\
673 	PIXMAN_ ## s,							\
674 	SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST,    \
675 	PIXMAN_null, 0,							\
676 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
677 	fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op,	\
678     }
679 
680 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NORMAL(op,s,d,func)		\
681     {   PIXMAN_OP_ ## op,						\
682 	PIXMAN_ ## s,							\
683 	(SCALED_NEAREST_FLAGS		|				\
684 	 FAST_PATH_NORMAL_REPEAT	|				\
685 	 FAST_PATH_X_UNIT_POSITIVE),					\
686 	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
687 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
688 	fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op,	\
689     }
690 
691 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD(op,s,d,func)		\
692     {   PIXMAN_OP_ ## op,						\
693 	PIXMAN_ ## s,							\
694 	(SCALED_NEAREST_FLAGS		|				\
695 	 FAST_PATH_PAD_REPEAT		|				\
696 	 FAST_PATH_X_UNIT_POSITIVE),					\
697 	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
698 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
699 	fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op,	\
700     }
701 
702 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE(op,s,d,func)		\
703     {   PIXMAN_OP_ ## op,						\
704 	PIXMAN_ ## s,							\
705 	(SCALED_NEAREST_FLAGS		|				\
706 	 FAST_PATH_NONE_REPEAT		|				\
707 	 FAST_PATH_X_UNIT_POSITIVE),					\
708 	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
709 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
710 	fast_composite_scaled_nearest_ ## func ## _none ## _ ## op,	\
711     }
712 
713 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER(op,s,d,func)		\
714     {   PIXMAN_OP_ ## op,						\
715 	PIXMAN_ ## s,							\
716 	SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST,	\
717 	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
718 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
719 	fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op,	\
720     }
721 
722 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func)		\
723     {   PIXMAN_OP_ ## op,						\
724 	PIXMAN_ ## s,							\
725 	(SCALED_NEAREST_FLAGS		|				\
726 	 FAST_PATH_NORMAL_REPEAT	|				\
727 	 FAST_PATH_X_UNIT_POSITIVE),					\
728 	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
729 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
730 	fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op,	\
731     }
732 
733 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD(op,s,d,func)		\
734     {   PIXMAN_OP_ ## op,						\
735 	PIXMAN_ ## s,							\
736 	(SCALED_NEAREST_FLAGS		|				\
737 	 FAST_PATH_PAD_REPEAT		|				\
738 	 FAST_PATH_X_UNIT_POSITIVE),					\
739 	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
740 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
741 	fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op,	\
742     }
743 
744 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE(op,s,d,func)		\
745     {   PIXMAN_OP_ ## op,						\
746 	PIXMAN_ ## s,							\
747 	(SCALED_NEAREST_FLAGS		|				\
748 	 FAST_PATH_NONE_REPEAT		|				\
749 	 FAST_PATH_X_UNIT_POSITIVE),					\
750 	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
751 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
752 	fast_composite_scaled_nearest_ ## func ## _none ## _ ## op,	\
753     }
754 
755 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER(op,s,d,func)		\
756     {   PIXMAN_OP_ ## op,						\
757 	PIXMAN_ ## s,							\
758 	SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST,	\
759 	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
760 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
761 	fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op,	\
762     }
763 
764 /* Prefer the use of 'cover' variant, because it is faster */
765 #define SIMPLE_NEAREST_FAST_PATH(op,s,d,func)				\
766     SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func),			\
767     SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func),			\
768     SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func),				\
769     SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func)
770 
771 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func)			\
772     SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func),		\
773     SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func),		\
774     SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func)
775 
776 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH(op,s,d,func)		\
777     SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER (op,s,d,func),		\
778     SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE (op,s,d,func),		\
779     SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD (op,s,d,func)
780 
781 /*****************************************************************************/
782 
783 /*
784  * Identify 5 zones in each scanline for bilinear scaling. Depending on
785  * whether 2 pixels to be interpolated are fetched from the image itself,
786  * from the padding area around it or from both image and padding area.
787  */
788 static force_inline void
bilinear_pad_repeat_get_scanline_bounds(int32_t source_image_width,pixman_fixed_t vx,pixman_fixed_t unit_x,int32_t * left_pad,int32_t * left_tz,int32_t * width,int32_t * right_tz,int32_t * right_pad)789 bilinear_pad_repeat_get_scanline_bounds (int32_t         source_image_width,
790 					 pixman_fixed_t  vx,
791 					 pixman_fixed_t  unit_x,
792 					 int32_t *       left_pad,
793 					 int32_t *       left_tz,
794 					 int32_t *       width,
795 					 int32_t *       right_tz,
796 					 int32_t *       right_pad)
797 {
798 	int width1 = *width, left_pad1, right_pad1;
799 	int width2 = *width, left_pad2, right_pad2;
800 
801 	pad_repeat_get_scanline_bounds (source_image_width, vx, unit_x,
802 					&width1, &left_pad1, &right_pad1);
803 	pad_repeat_get_scanline_bounds (source_image_width, vx + pixman_fixed_1,
804 					unit_x, &width2, &left_pad2, &right_pad2);
805 
806 	*left_pad = left_pad2;
807 	*left_tz = left_pad1 - left_pad2;
808 	*right_tz = right_pad2 - right_pad1;
809 	*right_pad = right_pad1;
810 	*width -= *left_pad + *left_tz + *right_tz + *right_pad;
811 }
812 
813 /*
814  * Main loop template for single pass bilinear scaling. It needs to be
815  * provided with 'scanline_func' which should do the compositing operation.
816  * The needed function has the following prototype:
817  *
818  *	scanline_func (dst_type_t *       dst,
819  *		       const mask_type_ * mask,
820  *		       const src_type_t * src_top,
821  *		       const src_type_t * src_bottom,
822  *		       int32_t            width,
823  *		       int                weight_top,
824  *		       int                weight_bottom,
825  *		       pixman_fixed_t     vx,
826  *		       pixman_fixed_t     unit_x,
827  *		       pixman_fixed_t     max_vx,
828  *		       pixman_bool_t      zero_src)
829  *
830  * Where:
831  *  dst                 - destination scanline buffer for storing results
832  *  mask                - mask buffer (or single value for solid mask)
833  *  src_top, src_bottom - two source scanlines
834  *  width               - number of pixels to process
835  *  weight_top          - weight of the top row for interpolation
836  *  weight_bottom       - weight of the bottom row for interpolation
837  *  vx                  - initial position for fetching the first pair of
838  *                        pixels from the source buffer
839  *  unit_x              - position increment needed to move to the next pair
840  *                        of pixels
841  *  max_vx              - image size as a fixed point value, can be used for
842  *                        implementing NORMAL repeat (when it is supported)
843  *  zero_src            - boolean hint variable, which is set to TRUE when
844  *                        all source pixels are fetched from zero padding
845  *                        zone for NONE repeat
846  *
847  * Note: normally the sum of 'weight_top' and 'weight_bottom' is equal to
848  *       BILINEAR_INTERPOLATION_RANGE, but sometimes it may be less than that
849  *       for NONE repeat when handling fuzzy antialiased top or bottom image
850  *       edges. Also both top and bottom weight variables are guaranteed to
851  *       have value, which is less than BILINEAR_INTERPOLATION_RANGE.
852  *       For example, the weights can fit into unsigned byte or be used
853  *       with 8-bit SIMD multiplication instructions for 8-bit interpolation
854  *       precision.
855  */
856 
857 /* Replace a single "scanline_func" with "fetch_func" & "op_func" to allow optional
858  * two stage processing (bilinear fetch to a temp buffer, followed by unscaled
859  * combine), "op_func" may be NULL, in this case we keep old behavior.
860  * This is ugly and gcc issues some warnings, but works.
861  *
862  * An advice: clang has much better error reporting than gcc for deeply nested macros.
863  */
864 
865 #define	scanline_func(dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst,            \
866                       scanline_buf, mask, src_top, src_bottom, width,                           \
867                       weight_top, weight_bottom, vx, unit_x, max_vx, zero_src)                  \
868  do {                                                                                           \
869 		if (op_func != NULL)								\
870 		{										\
871 		    fetch_func ((void *)scanline_buf, (mask), (src_top), (src_bottom), (width), \
872                         (weight_top), (weight_bottom), (vx), (unit_x), (max_vx), (zero_src));   \
873 		    ((void (*)(dst_type_t *, const mask_type_t *, const src_type_t *, int)) op_func)\
874 			((dst), (mask), (src_type_t *)scanline_buf, (width));			\
875 		}										\
876 		else										\
877 		{										\
878 		    fetch_func ((void*)(dst), (mask), (src_top), (src_bottom), (width), (weight_top),  \
879                                 (weight_bottom), (vx), (unit_x), (max_vx), (zero_src));         \
880 		}                                                                               \
881   } while (0)
882 
883 
884 #define SCANLINE_BUFFER_LENGTH 3072
885 
886 #define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, fetch_func, op_func, src_type_t,		\
887 				  mask_type_t, dst_type_t, repeat_mode, flags)			\
888 static void											\
889 fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,		\
890 						   pixman_composite_info_t *info)		\
891 {												\
892     PIXMAN_COMPOSITE_ARGS (info);								\
893     dst_type_t *dst_line;									\
894     mask_type_t *mask_line;									\
895     src_type_t *src_first_line;									\
896     int       y1, y2;										\
897     pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */		\
898     pixman_vector_t v;										\
899     pixman_fixed_t vx, vy;									\
900     pixman_fixed_t unit_x, unit_y;								\
901     int32_t left_pad, left_tz, right_tz, right_pad;						\
902 												\
903     dst_type_t *dst;										\
904     mask_type_t solid_mask;									\
905     const mask_type_t *mask = &solid_mask;							\
906     int src_stride, mask_stride, dst_stride;							\
907 												\
908     int src_width;										\
909     pixman_fixed_t src_width_fixed;								\
910     int max_x;											\
911     pixman_bool_t need_src_extension;								\
912                                                                                                 \
913     uint64_t stack_scanline_buffer[SCANLINE_BUFFER_LENGTH];                                     \
914     uint8_t *scanline_buffer = (uint8_t *) stack_scanline_buffer;                               \
915 												\
916     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1);	\
917     if (flags & FLAG_HAVE_SOLID_MASK)								\
918     {												\
919 	solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format);	\
920 	mask_stride = 0;									\
921     }												\
922     else if (flags & FLAG_HAVE_NON_SOLID_MASK)							\
923     {												\
924 	PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t,				\
925 			       mask_stride, mask_line, 1);					\
926     }												\
927 												\
928     /* pass in 0 instead of src_x and src_y because src_x and src_y need to be			\
929      * transformed from destination space to source space */					\
930     PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1);		\
931 												\
932     /* reference point is the center of the pixel */						\
933     v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;				\
934     v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;				\
935     v.vector[2] = pixman_fixed_1;								\
936 												\
937     if (!pixman_transform_point_3d (src_image->common.transform, &v))				\
938 	return;											\
939 												\
940     unit_x = src_image->common.transform->matrix[0][0];						\
941     unit_y = src_image->common.transform->matrix[1][1];						\
942 												\
943     v.vector[0] -= pixman_fixed_1 / 2;								\
944     v.vector[1] -= pixman_fixed_1 / 2;								\
945 												\
946     vy = v.vector[1];										\
947 												\
948     if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD ||					\
949 	PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)					\
950     {												\
951 	bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x,	\
952 					&left_pad, &left_tz, &width, &right_tz, &right_pad);	\
953 	if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD)					\
954 	{											\
955 	    /* PAD repeat does not need special handling for 'transition zones' and */		\
956 	    /* they can be combined with 'padding zones' safely */				\
957 	    left_pad += left_tz;								\
958 	    right_pad += right_tz;								\
959 	    left_tz = right_tz = 0;								\
960 	}											\
961 	v.vector[0] += left_pad * unit_x;							\
962     }												\
963 												\
964     if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)					\
965     {												\
966 	vx = v.vector[0];									\
967 	repeat (PIXMAN_REPEAT_NORMAL, &vx, pixman_int_to_fixed(src_image->bits.width));		\
968 	max_x = pixman_fixed_to_int (vx + (width - 1) * (int64_t)unit_x) + 1;			\
969 												\
970 	if (src_image->bits.width < REPEAT_NORMAL_MIN_WIDTH)					\
971 	{											\
972 	    src_width = 0;									\
973 												\
974 	    while (src_width < REPEAT_NORMAL_MIN_WIDTH && src_width <= max_x)			\
975 		src_width += src_image->bits.width;						\
976 												\
977 	    need_src_extension = TRUE;								\
978 	}											\
979 	else											\
980 	{											\
981 	    src_width = src_image->bits.width;							\
982 	    need_src_extension = FALSE;								\
983 	}											\
984 												\
985 	src_width_fixed = pixman_int_to_fixed (src_width);					\
986     }												\
987                                                                                                 \
988     if (op_func != NULL && width * sizeof(src_type_t) > sizeof(stack_scanline_buffer))          \
989     {                                                                                           \
990 	scanline_buffer = pixman_malloc_ab (width, sizeof(src_type_t));                         \
991                                                                                                 \
992 	if (!scanline_buffer)                                                                   \
993 	    return;                                                                             \
994     }                                                                                           \
995 												\
996     while (--height >= 0)									\
997     {												\
998 	int weight1, weight2;									\
999 	dst = dst_line;										\
1000 	dst_line += dst_stride;									\
1001 	vx = v.vector[0];									\
1002 	if (flags & FLAG_HAVE_NON_SOLID_MASK)							\
1003 	{											\
1004 	    mask = mask_line;									\
1005 	    mask_line += mask_stride;								\
1006 	}											\
1007 												\
1008 	y1 = pixman_fixed_to_int (vy);								\
1009 	weight2 = pixman_fixed_to_bilinear_weight (vy);						\
1010 	if (weight2)										\
1011 	{											\
1012 	    /* both weight1 and weight2 are smaller than BILINEAR_INTERPOLATION_RANGE */	\
1013 	    y2 = y1 + 1;									\
1014 	    weight1 = BILINEAR_INTERPOLATION_RANGE - weight2;					\
1015 	}											\
1016 	else											\
1017 	{											\
1018 	    /* set both top and bottom row to the same scanline and tweak weights */		\
1019 	    y2 = y1;										\
1020 	    weight1 = weight2 = BILINEAR_INTERPOLATION_RANGE / 2;				\
1021 	}											\
1022 	vy += unit_y;										\
1023 	if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD)					\
1024 	{											\
1025 	    src_type_t *src1, *src2;								\
1026 	    src_type_t buf1[2];									\
1027 	    src_type_t buf2[2];									\
1028 	    repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height);				\
1029 	    repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height);				\
1030 	    src1 = src_first_line + src_stride * y1;						\
1031 	    src2 = src_first_line + src_stride * y2;						\
1032 												\
1033 	    if (left_pad > 0)									\
1034 	    {											\
1035 		buf1[0] = buf1[1] = src1[0];							\
1036 		buf2[0] = buf2[1] = src2[0];							\
1037 		scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst,	\
1038 			       scanline_buffer, mask, buf1, buf2, left_pad, weight1, weight2,   \
1039                                0, 0, 0, FALSE);	                                                \
1040 		dst += left_pad;								\
1041 		if (flags & FLAG_HAVE_NON_SOLID_MASK)						\
1042 		    mask += left_pad;								\
1043 	    }											\
1044 	    if (width > 0)									\
1045 	    {											\
1046 		scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst,	\
1047 			       scanline_buffer, mask, src1, src2, width, weight1, weight2,      \
1048                                vx, unit_x, 0, FALSE);                                           \
1049 		dst += width;									\
1050 		if (flags & FLAG_HAVE_NON_SOLID_MASK)						\
1051 		    mask += width;								\
1052 	    }											\
1053 	    if (right_pad > 0)									\
1054 	    {											\
1055 		buf1[0] = buf1[1] = src1[src_image->bits.width - 1];				\
1056 		buf2[0] = buf2[1] = src2[src_image->bits.width - 1];				\
1057 		scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst,	\
1058 			       scanline_buffer, mask, buf1, buf2, right_pad, weight1, weight2,  \
1059                                0, 0, 0, FALSE);                                                 \
1060 	    }											\
1061 	}											\
1062 	else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)				\
1063 	{											\
1064 	    src_type_t *src1, *src2;								\
1065 	    src_type_t buf1[2];									\
1066 	    src_type_t buf2[2];									\
1067 	    /* handle top/bottom zero padding by just setting weights to 0 if needed */		\
1068 	    if (y1 < 0)										\
1069 	    {											\
1070 		weight1 = 0;									\
1071 		y1 = 0;										\
1072 	    }											\
1073 	    if (y1 >= src_image->bits.height)							\
1074 	    {											\
1075 		weight1 = 0;									\
1076 		y1 = src_image->bits.height - 1;						\
1077 	    }											\
1078 	    if (y2 < 0)										\
1079 	    {											\
1080 		weight2 = 0;									\
1081 		y2 = 0;										\
1082 	    }											\
1083 	    if (y2 >= src_image->bits.height)							\
1084 	    {											\
1085 		weight2 = 0;									\
1086 		y2 = src_image->bits.height - 1;						\
1087 	    }											\
1088 	    src1 = src_first_line + src_stride * y1;						\
1089 	    src2 = src_first_line + src_stride * y2;						\
1090 												\
1091 	    if (left_pad > 0)									\
1092 	    {											\
1093 		buf1[0] = buf1[1] = 0;								\
1094 		buf2[0] = buf2[1] = 0;								\
1095 		scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst,	\
1096 			       scanline_buffer, mask, buf1, buf2, left_pad, weight1, weight2,   \
1097                                0, 0, 0, TRUE);	                                                \
1098 		dst += left_pad;								\
1099 		if (flags & FLAG_HAVE_NON_SOLID_MASK)						\
1100 		    mask += left_pad;								\
1101 	    }											\
1102 	    if (left_tz > 0)									\
1103 	    {											\
1104 		buf1[0] = 0;									\
1105 		buf1[1] = src1[0];								\
1106 		buf2[0] = 0;									\
1107 		buf2[1] = src2[0];								\
1108 		scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst,	\
1109 			       scanline_buffer, mask, buf1, buf2, left_tz, weight1, weight2,	\
1110 			       pixman_fixed_frac (vx), unit_x, 0, FALSE);			\
1111 		dst += left_tz;									\
1112 		if (flags & FLAG_HAVE_NON_SOLID_MASK)						\
1113 		    mask += left_tz;								\
1114 		vx += left_tz * unit_x;								\
1115 	    }											\
1116 	    if (width > 0)									\
1117 	    {											\
1118 		scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst,	\
1119 			       scanline_buffer, mask, src1, src2, width, weight1, weight2,      \
1120                                vx, unit_x, 0, FALSE);                                           \
1121 		dst += width;									\
1122 		if (flags & FLAG_HAVE_NON_SOLID_MASK)						\
1123 		    mask += width;								\
1124 		vx += width * unit_x;								\
1125 	    }											\
1126 	    if (right_tz > 0)									\
1127 	    {											\
1128 		buf1[0] = src1[src_image->bits.width - 1];					\
1129 		buf1[1] = 0;									\
1130 		buf2[0] = src2[src_image->bits.width - 1];					\
1131 		buf2[1] = 0;									\
1132 		scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst,	\
1133 			       scanline_buffer, mask, buf1, buf2, right_tz, weight1, weight2,   \
1134 			       pixman_fixed_frac (vx), unit_x, 0, FALSE);			\
1135 		dst += right_tz;								\
1136 		if (flags & FLAG_HAVE_NON_SOLID_MASK)						\
1137 		    mask += right_tz;								\
1138 	    }											\
1139 	    if (right_pad > 0)									\
1140 	    {											\
1141 		buf1[0] = buf1[1] = 0;								\
1142 		buf2[0] = buf2[1] = 0;								\
1143 		scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst,	\
1144 			       scanline_buffer, mask, buf1, buf2, right_pad, weight1, weight2,  \
1145                                0, 0, 0, TRUE);	                                                \
1146 	    }											\
1147 	}											\
1148 	else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)				\
1149 	{											\
1150 	    int32_t	    num_pixels;								\
1151 	    int32_t	    width_remain;							\
1152 	    src_type_t *    src_line_top;							\
1153 	    src_type_t *    src_line_bottom;							\
1154 	    src_type_t	    buf1[2];								\
1155 	    src_type_t	    buf2[2];								\
1156 	    src_type_t	    extended_src_line0[REPEAT_NORMAL_MIN_WIDTH*2];			\
1157 	    src_type_t	    extended_src_line1[REPEAT_NORMAL_MIN_WIDTH*2];			\
1158 	    int		    i, j;								\
1159 												\
1160 	    repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height);				\
1161 	    repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height);				\
1162 	    src_line_top = src_first_line + src_stride * y1;					\
1163 	    src_line_bottom = src_first_line + src_stride * y2;					\
1164 												\
1165 	    if (need_src_extension)								\
1166 	    {											\
1167 		for (i=0; i<src_width;)								\
1168 		{										\
1169 		    for (j=0; j<src_image->bits.width; j++, i++)				\
1170 		    {										\
1171 			extended_src_line0[i] = src_line_top[j];				\
1172 			extended_src_line1[i] = src_line_bottom[j];				\
1173 		    }										\
1174 		}										\
1175 												\
1176 		src_line_top = &extended_src_line0[0];						\
1177 		src_line_bottom = &extended_src_line1[0];					\
1178 	    }											\
1179 												\
1180 	    /* Top & Bottom wrap around buffer */						\
1181 	    buf1[0] = src_line_top[src_width - 1];						\
1182 	    buf1[1] = src_line_top[0];								\
1183 	    buf2[0] = src_line_bottom[src_width - 1];						\
1184 	    buf2[1] = src_line_bottom[0];							\
1185 												\
1186 	    width_remain = width;								\
1187 												\
1188 	    while (width_remain > 0)								\
1189 	    {											\
1190 		/* We use src_width_fixed because it can make vx in original source range */	\
1191 		repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed);				\
1192 												\
1193 		/* Wrap around part */								\
1194 		if (pixman_fixed_to_int (vx) == src_width - 1)					\
1195 		{										\
1196 		    /* for positive unit_x							\
1197 		     * num_pixels = max(n) + 1, where vx + n*unit_x < src_width_fixed		\
1198 		     *										\
1199 		     * vx is in range [0, src_width_fixed - pixman_fixed_e]			\
1200 		     * So we are safe from overflow.						\
1201 		     */										\
1202 		    num_pixels = ((src_width_fixed - vx - pixman_fixed_e) / unit_x) + 1;	\
1203 												\
1204 		    if (num_pixels > width_remain)						\
1205 			num_pixels = width_remain;						\
1206 												\
1207 		    scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func,    \
1208                                    dst, scanline_buffer, mask, buf1, buf2, num_pixels,          \
1209 				   weight1, weight2, pixman_fixed_frac(vx),			\
1210 				   unit_x, src_width_fixed, FALSE);				\
1211 												\
1212 		    width_remain -= num_pixels;							\
1213 		    vx += num_pixels * unit_x;							\
1214 		    dst += num_pixels;								\
1215 												\
1216 		    if (flags & FLAG_HAVE_NON_SOLID_MASK)					\
1217 			mask += num_pixels;							\
1218 												\
1219 		    repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed);			\
1220 		}										\
1221 												\
1222 		/* Normal scanline composite */							\
1223 		if (pixman_fixed_to_int (vx) != src_width - 1 && width_remain > 0)		\
1224 		{										\
1225 		    /* for positive unit_x							\
1226 		     * num_pixels = max(n) + 1, where vx + n*unit_x < (src_width_fixed - 1)	\
1227 		     *										\
1228 		     * vx is in range [0, src_width_fixed - pixman_fixed_e]			\
1229 		     * So we are safe from overflow here.					\
1230 		     */										\
1231 		    num_pixels = ((src_width_fixed - pixman_fixed_1 - vx - pixman_fixed_e)	\
1232 				  / unit_x) + 1;						\
1233 												\
1234 		    if (num_pixels > width_remain)						\
1235 			num_pixels = width_remain;						\
1236 												\
1237 		    scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func,    \
1238                                    dst, scanline_buffer, mask, src_line_top, src_line_bottom,   \
1239                                    num_pixels, weight1, weight2, vx, unit_x, src_width_fixed,   \
1240                                    FALSE);	                                                \
1241 												\
1242 		    width_remain -= num_pixels;							\
1243 		    vx += num_pixels * unit_x;							\
1244 		    dst += num_pixels;								\
1245 												\
1246 		    if (flags & FLAG_HAVE_NON_SOLID_MASK)					\
1247 		        mask += num_pixels;							\
1248 		}										\
1249 	    }											\
1250 	}											\
1251 	else											\
1252 	{											\
1253 	    scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst,       \
1254                            scanline_buffer, mask,                                               \
1255                            src_first_line + src_stride * y1,					\
1256 			   src_first_line + src_stride * y2, width,				\
1257 			   weight1, weight2, vx, unit_x, max_vx, FALSE);			\
1258 	}											\
1259     }												\
1260     if (scanline_buffer != (uint8_t *) stack_scanline_buffer)                                   \
1261 	free (scanline_buffer);                                                                 \
1262 }
1263 
1264 /* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
1265 #define FAST_BILINEAR_MAINLOOP_COMMON(scale_func_name, fetch_func, op_func, src_type_t, mask_type_t,\
1266 				  dst_type_t, repeat_mode, flags)				\
1267 	FAST_BILINEAR_MAINLOOP_INT(_ ## scale_func_name, fetch_func, op_func, src_type_t, mask_type_t,\
1268 				  dst_type_t, repeat_mode, flags)
1269 
1270 #define SCALED_BILINEAR_FLAGS						\
1271     (FAST_PATH_SCALE_TRANSFORM	|					\
1272      FAST_PATH_NO_ALPHA_MAP	|					\
1273      FAST_PATH_BILINEAR_FILTER	|					\
1274      FAST_PATH_NO_ACCESSORS	|					\
1275      FAST_PATH_NARROW_FORMAT)
1276 
1277 #define SIMPLE_BILINEAR_FAST_PATH_PAD(op,s,d,func)			\
1278     {   PIXMAN_OP_ ## op,						\
1279 	PIXMAN_ ## s,							\
1280 	(SCALED_BILINEAR_FLAGS		|				\
1281 	 FAST_PATH_PAD_REPEAT		|				\
1282 	 FAST_PATH_X_UNIT_POSITIVE),					\
1283 	PIXMAN_null, 0,							\
1284 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
1285 	fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op,	\
1286     }
1287 
1288 #define SIMPLE_BILINEAR_FAST_PATH_NONE(op,s,d,func)			\
1289     {   PIXMAN_OP_ ## op,						\
1290 	PIXMAN_ ## s,							\
1291 	(SCALED_BILINEAR_FLAGS		|				\
1292 	 FAST_PATH_NONE_REPEAT		|				\
1293 	 FAST_PATH_X_UNIT_POSITIVE),					\
1294 	PIXMAN_null, 0,							\
1295 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
1296 	fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op,	\
1297     }
1298 
1299 #define SIMPLE_BILINEAR_FAST_PATH_COVER(op,s,d,func)			\
1300     {   PIXMAN_OP_ ## op,						\
1301 	PIXMAN_ ## s,							\
1302 	SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR,	\
1303 	PIXMAN_null, 0,							\
1304 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
1305 	fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op,	\
1306     }
1307 
1308 #define SIMPLE_BILINEAR_FAST_PATH_NORMAL(op,s,d,func)			\
1309     {   PIXMAN_OP_ ## op,						\
1310 	PIXMAN_ ## s,							\
1311 	(SCALED_BILINEAR_FLAGS		|				\
1312 	 FAST_PATH_NORMAL_REPEAT	|				\
1313 	 FAST_PATH_X_UNIT_POSITIVE),					\
1314 	PIXMAN_null, 0,							\
1315 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
1316 	fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op,	\
1317     }
1318 
1319 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD(op,s,d,func)		\
1320     {   PIXMAN_OP_ ## op,						\
1321 	PIXMAN_ ## s,							\
1322 	(SCALED_BILINEAR_FLAGS		|				\
1323 	 FAST_PATH_PAD_REPEAT		|				\
1324 	 FAST_PATH_X_UNIT_POSITIVE),					\
1325 	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
1326 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
1327 	fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op,	\
1328     }
1329 
1330 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE(op,s,d,func)		\
1331     {   PIXMAN_OP_ ## op,						\
1332 	PIXMAN_ ## s,							\
1333 	(SCALED_BILINEAR_FLAGS		|				\
1334 	 FAST_PATH_NONE_REPEAT		|				\
1335 	 FAST_PATH_X_UNIT_POSITIVE),					\
1336 	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
1337 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
1338 	fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op,	\
1339     }
1340 
1341 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER(op,s,d,func)		\
1342     {   PIXMAN_OP_ ## op,						\
1343 	PIXMAN_ ## s,							\
1344 	SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR,	\
1345 	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
1346 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
1347 	fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op,	\
1348     }
1349 
1350 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL(op,s,d,func)		\
1351     {   PIXMAN_OP_ ## op,						\
1352 	PIXMAN_ ## s,							\
1353 	(SCALED_BILINEAR_FLAGS		|				\
1354 	 FAST_PATH_NORMAL_REPEAT	|				\
1355 	 FAST_PATH_X_UNIT_POSITIVE),					\
1356 	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
1357 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
1358 	fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op,	\
1359     }
1360 
1361 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD(op,s,d,func)		\
1362     {   PIXMAN_OP_ ## op,						\
1363 	PIXMAN_ ## s,							\
1364 	(SCALED_BILINEAR_FLAGS		|				\
1365 	 FAST_PATH_PAD_REPEAT		|				\
1366 	 FAST_PATH_X_UNIT_POSITIVE),					\
1367 	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
1368 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
1369 	fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op,	\
1370     }
1371 
1372 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE(op,s,d,func)		\
1373     {   PIXMAN_OP_ ## op,						\
1374 	PIXMAN_ ## s,							\
1375 	(SCALED_BILINEAR_FLAGS		|				\
1376 	 FAST_PATH_NONE_REPEAT		|				\
1377 	 FAST_PATH_X_UNIT_POSITIVE),					\
1378 	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
1379 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
1380 	fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op,	\
1381     }
1382 
1383 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER(op,s,d,func)		\
1384     {   PIXMAN_OP_ ## op,						\
1385 	PIXMAN_ ## s,							\
1386 	SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR,	\
1387 	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
1388 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
1389 	fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op,	\
1390     }
1391 
1392 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func)	\
1393     {   PIXMAN_OP_ ## op,						\
1394 	PIXMAN_ ## s,							\
1395 	(SCALED_BILINEAR_FLAGS		|				\
1396 	 FAST_PATH_NORMAL_REPEAT	|				\
1397 	 FAST_PATH_X_UNIT_POSITIVE),					\
1398 	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
1399 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
1400 	fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op,	\
1401     }
1402 
1403 /* Prefer the use of 'cover' variant, because it is faster */
1404 #define SIMPLE_BILINEAR_FAST_PATH(op,s,d,func)				\
1405     SIMPLE_BILINEAR_FAST_PATH_COVER (op,s,d,func),			\
1406     SIMPLE_BILINEAR_FAST_PATH_NONE (op,s,d,func),			\
1407     SIMPLE_BILINEAR_FAST_PATH_PAD (op,s,d,func),			\
1408     SIMPLE_BILINEAR_FAST_PATH_NORMAL (op,s,d,func)
1409 
1410 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH(op,s,d,func)			\
1411     SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER (op,s,d,func),		\
1412     SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE (op,s,d,func),		\
1413     SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD (op,s,d,func),		\
1414     SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL (op,s,d,func)
1415 
1416 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH(op,s,d,func)		\
1417     SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER (op,s,d,func),		\
1418     SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE (op,s,d,func),		\
1419     SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD (op,s,d,func),		\
1420     SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL (op,s,d,func)
1421 
1422 #endif
1423