1 /* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
2 /*
3  * Copyright © 2000 SuSE, Inc.
4  * Copyright © 2007 Red Hat, Inc.
5  *
6  * Permission to use, copy, modify, distribute, and sell this software and its
7  * documentation for any purpose is hereby granted without fee, provided that
8  * the above copyright notice appear in all copies and that both that
9  * copyright notice and this permission notice appear in supporting
10  * documentation, and that the name of SuSE not be used in advertising or
11  * publicity pertaining to distribution of the software without specific,
12  * written prior permission.  SuSE makes no representations about the
13  * suitability of this software for any purpose.  It is provided "as is"
14  * without express or implied warranty.
15  *
16  * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
18  * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
20  * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
21  * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
22  *
23  * Author:  Keith Packard, SuSE, Inc.
24  */
25 
26 #ifdef HAVE_CONFIG_H
27 #include <config.h>
28 #endif
29 #include <string.h>
30 #include <stdlib.h>
31 #include "pixman-private.h"
32 #include "pixman-combine32.h"
33 #include "pixman-inlines.h"
34 
35 static force_inline uint32_t
fetch_24(uint8_t * a)36 fetch_24 (uint8_t *a)
37 {
38     if (((uintptr_t)a) & 1)
39     {
40 #ifdef WORDS_BIGENDIAN
41 	return (*a << 16) | (*(uint16_t *)(a + 1));
42 #else
43 	return *a | (*(uint16_t *)(a + 1) << 8);
44 #endif
45     }
46     else
47     {
48 #ifdef WORDS_BIGENDIAN
49 	return (*(uint16_t *)a << 8) | *(a + 2);
50 #else
51 	return *(uint16_t *)a | (*(a + 2) << 16);
52 #endif
53     }
54 }
55 
56 static force_inline void
store_24(uint8_t * a,uint32_t v)57 store_24 (uint8_t *a,
58           uint32_t v)
59 {
60     if (((uintptr_t)a) & 1)
61     {
62 #ifdef WORDS_BIGENDIAN
63 	*a = (uint8_t) (v >> 16);
64 	*(uint16_t *)(a + 1) = (uint16_t) (v);
65 #else
66 	*a = (uint8_t) (v);
67 	*(uint16_t *)(a + 1) = (uint16_t) (v >> 8);
68 #endif
69     }
70     else
71     {
72 #ifdef WORDS_BIGENDIAN
73 	*(uint16_t *)a = (uint16_t)(v >> 8);
74 	*(a + 2) = (uint8_t)v;
75 #else
76 	*(uint16_t *)a = (uint16_t)v;
77 	*(a + 2) = (uint8_t)(v >> 16);
78 #endif
79     }
80 }
81 
82 static force_inline uint32_t
over(uint32_t src,uint32_t dest)83 over (uint32_t src,
84       uint32_t dest)
85 {
86     uint32_t a = ~src >> 24;
87 
88     UN8x4_MUL_UN8_ADD_UN8x4 (dest, a, src);
89 
90     return dest;
91 }
92 
93 static force_inline uint32_t
in(uint32_t x,uint8_t y)94 in (uint32_t x,
95     uint8_t  y)
96 {
97     uint16_t a = y;
98 
99     UN8x4_MUL_UN8 (x, a);
100 
101     return x;
102 }
103 
104 /*
105  * Naming convention:
106  *
107  *  op_src_mask_dest
108  */
109 static void
fast_composite_over_x888_8_8888(pixman_implementation_t * imp,pixman_composite_info_t * info)110 fast_composite_over_x888_8_8888 (pixman_implementation_t *imp,
111                                  pixman_composite_info_t *info)
112 {
113     PIXMAN_COMPOSITE_ARGS (info);
114     uint32_t    *src, *src_line;
115     uint32_t    *dst, *dst_line;
116     uint8_t     *mask, *mask_line;
117     int src_stride, mask_stride, dst_stride;
118     uint8_t m;
119     uint32_t s, d;
120     int32_t w;
121 
122     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
123     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
124     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
125 
126     while (height--)
127     {
128 	src = src_line;
129 	src_line += src_stride;
130 	dst = dst_line;
131 	dst_line += dst_stride;
132 	mask = mask_line;
133 	mask_line += mask_stride;
134 
135 	w = width;
136 	while (w--)
137 	{
138 	    m = *mask++;
139 	    if (m)
140 	    {
141 		s = *src | 0xff000000;
142 
143 		if (m == 0xff)
144 		{
145 		    *dst = s;
146 		}
147 		else
148 		{
149 		    d = in (s, m);
150 		    *dst = over (d, *dst);
151 		}
152 	    }
153 	    src++;
154 	    dst++;
155 	}
156     }
157 }
158 
159 static void
fast_composite_in_n_8_8(pixman_implementation_t * imp,pixman_composite_info_t * info)160 fast_composite_in_n_8_8 (pixman_implementation_t *imp,
161                          pixman_composite_info_t *info)
162 {
163     PIXMAN_COMPOSITE_ARGS (info);
164     uint32_t src, srca;
165     uint8_t     *dst_line, *dst;
166     uint8_t     *mask_line, *mask, m;
167     int dst_stride, mask_stride;
168     int32_t w;
169     uint16_t t;
170 
171     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
172 
173     srca = src >> 24;
174 
175     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
176     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
177 
178     if (srca == 0xff)
179     {
180 	while (height--)
181 	{
182 	    dst = dst_line;
183 	    dst_line += dst_stride;
184 	    mask = mask_line;
185 	    mask_line += mask_stride;
186 	    w = width;
187 
188 	    while (w--)
189 	    {
190 		m = *mask++;
191 
192 		if (m == 0)
193 		    *dst = 0;
194 		else if (m != 0xff)
195 		    *dst = MUL_UN8 (m, *dst, t);
196 
197 		dst++;
198 	    }
199 	}
200     }
201     else
202     {
203 	while (height--)
204 	{
205 	    dst = dst_line;
206 	    dst_line += dst_stride;
207 	    mask = mask_line;
208 	    mask_line += mask_stride;
209 	    w = width;
210 
211 	    while (w--)
212 	    {
213 		m = *mask++;
214 		m = MUL_UN8 (m, srca, t);
215 
216 		if (m == 0)
217 		    *dst = 0;
218 		else if (m != 0xff)
219 		    *dst = MUL_UN8 (m, *dst, t);
220 
221 		dst++;
222 	    }
223 	}
224     }
225 }
226 
227 static void
fast_composite_in_8_8(pixman_implementation_t * imp,pixman_composite_info_t * info)228 fast_composite_in_8_8 (pixman_implementation_t *imp,
229                        pixman_composite_info_t *info)
230 {
231     PIXMAN_COMPOSITE_ARGS (info);
232     uint8_t     *dst_line, *dst;
233     uint8_t     *src_line, *src;
234     int dst_stride, src_stride;
235     int32_t w;
236     uint8_t s;
237     uint16_t t;
238 
239     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
240     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
241 
242     while (height--)
243     {
244 	dst = dst_line;
245 	dst_line += dst_stride;
246 	src = src_line;
247 	src_line += src_stride;
248 	w = width;
249 
250 	while (w--)
251 	{
252 	    s = *src++;
253 
254 	    if (s == 0)
255 		*dst = 0;
256 	    else if (s != 0xff)
257 		*dst = MUL_UN8 (s, *dst, t);
258 
259 	    dst++;
260 	}
261     }
262 }
263 
264 static void
fast_composite_over_n_8_8888(pixman_implementation_t * imp,pixman_composite_info_t * info)265 fast_composite_over_n_8_8888 (pixman_implementation_t *imp,
266                               pixman_composite_info_t *info)
267 {
268     PIXMAN_COMPOSITE_ARGS (info);
269     uint32_t src, srca;
270     uint32_t    *dst_line, *dst, d;
271     uint8_t     *mask_line, *mask, m;
272     int dst_stride, mask_stride;
273     int32_t w;
274 
275     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
276 
277     srca = src >> 24;
278     if (src == 0)
279 	return;
280 
281     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
282     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
283 
284     while (height--)
285     {
286 	dst = dst_line;
287 	dst_line += dst_stride;
288 	mask = mask_line;
289 	mask_line += mask_stride;
290 	w = width;
291 
292 	while (w--)
293 	{
294 	    m = *mask++;
295 	    if (m == 0xff)
296 	    {
297 		if (srca == 0xff)
298 		    *dst = src;
299 		else
300 		    *dst = over (src, *dst);
301 	    }
302 	    else if (m)
303 	    {
304 		d = in (src, m);
305 		*dst = over (d, *dst);
306 	    }
307 	    dst++;
308 	}
309     }
310 }
311 
312 static void
fast_composite_add_n_8888_8888_ca(pixman_implementation_t * imp,pixman_composite_info_t * info)313 fast_composite_add_n_8888_8888_ca (pixman_implementation_t *imp,
314 				   pixman_composite_info_t *info)
315 {
316     PIXMAN_COMPOSITE_ARGS (info);
317     uint32_t src, s;
318     uint32_t    *dst_line, *dst, d;
319     uint32_t    *mask_line, *mask, ma;
320     int dst_stride, mask_stride;
321     int32_t w;
322 
323     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
324 
325     if (src == 0)
326 	return;
327 
328     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
329     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
330 
331     while (height--)
332     {
333 	dst = dst_line;
334 	dst_line += dst_stride;
335 	mask = mask_line;
336 	mask_line += mask_stride;
337 	w = width;
338 
339 	while (w--)
340 	{
341 	    ma = *mask++;
342 
343 	    if (ma)
344 	    {
345 		d = *dst;
346 		s = src;
347 
348 		UN8x4_MUL_UN8x4_ADD_UN8x4 (s, ma, d);
349 
350 		*dst = s;
351 	    }
352 
353 	    dst++;
354 	}
355     }
356 }
357 
358 static void
fast_composite_over_n_8888_8888_ca(pixman_implementation_t * imp,pixman_composite_info_t * info)359 fast_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
360                                     pixman_composite_info_t *info)
361 {
362     PIXMAN_COMPOSITE_ARGS (info);
363     uint32_t src, srca, s;
364     uint32_t    *dst_line, *dst, d;
365     uint32_t    *mask_line, *mask, ma;
366     int dst_stride, mask_stride;
367     int32_t w;
368 
369     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
370 
371     srca = src >> 24;
372     if (src == 0)
373 	return;
374 
375     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
376     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
377 
378     while (height--)
379     {
380 	dst = dst_line;
381 	dst_line += dst_stride;
382 	mask = mask_line;
383 	mask_line += mask_stride;
384 	w = width;
385 
386 	while (w--)
387 	{
388 	    ma = *mask++;
389 	    if (ma == 0xffffffff)
390 	    {
391 		if (srca == 0xff)
392 		    *dst = src;
393 		else
394 		    *dst = over (src, *dst);
395 	    }
396 	    else if (ma)
397 	    {
398 		d = *dst;
399 		s = src;
400 
401 		UN8x4_MUL_UN8x4 (s, ma);
402 		UN8x4_MUL_UN8 (ma, srca);
403 		ma = ~ma;
404 		UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s);
405 
406 		*dst = d;
407 	    }
408 
409 	    dst++;
410 	}
411     }
412 }
413 
414 static void
fast_composite_over_n_8_0888(pixman_implementation_t * imp,pixman_composite_info_t * info)415 fast_composite_over_n_8_0888 (pixman_implementation_t *imp,
416                               pixman_composite_info_t *info)
417 {
418     PIXMAN_COMPOSITE_ARGS (info);
419     uint32_t src, srca;
420     uint8_t     *dst_line, *dst;
421     uint32_t d;
422     uint8_t     *mask_line, *mask, m;
423     int dst_stride, mask_stride;
424     int32_t w;
425 
426     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
427 
428     srca = src >> 24;
429     if (src == 0)
430 	return;
431 
432     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3);
433     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
434 
435     while (height--)
436     {
437 	dst = dst_line;
438 	dst_line += dst_stride;
439 	mask = mask_line;
440 	mask_line += mask_stride;
441 	w = width;
442 
443 	while (w--)
444 	{
445 	    m = *mask++;
446 	    if (m == 0xff)
447 	    {
448 		if (srca == 0xff)
449 		{
450 		    d = src;
451 		}
452 		else
453 		{
454 		    d = fetch_24 (dst);
455 		    d = over (src, d);
456 		}
457 		store_24 (dst, d);
458 	    }
459 	    else if (m)
460 	    {
461 		d = over (in (src, m), fetch_24 (dst));
462 		store_24 (dst, d);
463 	    }
464 	    dst += 3;
465 	}
466     }
467 }
468 
469 static void
fast_composite_over_n_8_0565(pixman_implementation_t * imp,pixman_composite_info_t * info)470 fast_composite_over_n_8_0565 (pixman_implementation_t *imp,
471                               pixman_composite_info_t *info)
472 {
473     PIXMAN_COMPOSITE_ARGS (info);
474     uint32_t src, srca;
475     uint16_t    *dst_line, *dst;
476     uint32_t d;
477     uint8_t     *mask_line, *mask, m;
478     int dst_stride, mask_stride;
479     int32_t w;
480 
481     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
482 
483     srca = src >> 24;
484     if (src == 0)
485 	return;
486 
487     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
488     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
489 
490     while (height--)
491     {
492 	dst = dst_line;
493 	dst_line += dst_stride;
494 	mask = mask_line;
495 	mask_line += mask_stride;
496 	w = width;
497 
498 	while (w--)
499 	{
500 	    m = *mask++;
501 	    if (m == 0xff)
502 	    {
503 		if (srca == 0xff)
504 		{
505 		    d = src;
506 		}
507 		else
508 		{
509 		    d = *dst;
510 		    d = over (src, convert_0565_to_0888 (d));
511 		}
512 		*dst = convert_8888_to_0565 (d);
513 	    }
514 	    else if (m)
515 	    {
516 		d = *dst;
517 		d = over (in (src, m), convert_0565_to_0888 (d));
518 		*dst = convert_8888_to_0565 (d);
519 	    }
520 	    dst++;
521 	}
522     }
523 }
524 
525 static void
fast_composite_over_n_8888_0565_ca(pixman_implementation_t * imp,pixman_composite_info_t * info)526 fast_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
527                                     pixman_composite_info_t *info)
528 {
529     PIXMAN_COMPOSITE_ARGS (info);
530     uint32_t  src, srca, s;
531     uint16_t  src16;
532     uint16_t *dst_line, *dst;
533     uint32_t  d;
534     uint32_t *mask_line, *mask, ma;
535     int dst_stride, mask_stride;
536     int32_t w;
537 
538     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
539 
540     srca = src >> 24;
541     if (src == 0)
542 	return;
543 
544     src16 = convert_8888_to_0565 (src);
545 
546     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
547     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
548 
549     while (height--)
550     {
551 	dst = dst_line;
552 	dst_line += dst_stride;
553 	mask = mask_line;
554 	mask_line += mask_stride;
555 	w = width;
556 
557 	while (w--)
558 	{
559 	    ma = *mask++;
560 	    if (ma == 0xffffffff)
561 	    {
562 		if (srca == 0xff)
563 		{
564 		    *dst = src16;
565 		}
566 		else
567 		{
568 		    d = *dst;
569 		    d = over (src, convert_0565_to_0888 (d));
570 		    *dst = convert_8888_to_0565 (d);
571 		}
572 	    }
573 	    else if (ma)
574 	    {
575 		d = *dst;
576 		d = convert_0565_to_0888 (d);
577 
578 		s = src;
579 
580 		UN8x4_MUL_UN8x4 (s, ma);
581 		UN8x4_MUL_UN8 (ma, srca);
582 		ma = ~ma;
583 		UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s);
584 
585 		*dst = convert_8888_to_0565 (d);
586 	    }
587 	    dst++;
588 	}
589     }
590 }
591 
592 static void
fast_composite_over_8888_8888(pixman_implementation_t * imp,pixman_composite_info_t * info)593 fast_composite_over_8888_8888 (pixman_implementation_t *imp,
594                                pixman_composite_info_t *info)
595 {
596     PIXMAN_COMPOSITE_ARGS (info);
597     uint32_t    *dst_line, *dst;
598     uint32_t    *src_line, *src, s;
599     int dst_stride, src_stride;
600     uint8_t a;
601     int32_t w;
602 
603     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
604     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
605 
606     while (height--)
607     {
608 	dst = dst_line;
609 	dst_line += dst_stride;
610 	src = src_line;
611 	src_line += src_stride;
612 	w = width;
613 
614 	while (w--)
615 	{
616 	    s = *src++;
617 	    a = s >> 24;
618 	    if (a == 0xff)
619 		*dst = s;
620 	    else if (s)
621 		*dst = over (s, *dst);
622 	    dst++;
623 	}
624     }
625 }
626 
627 static void
fast_composite_src_x888_8888(pixman_implementation_t * imp,pixman_composite_info_t * info)628 fast_composite_src_x888_8888 (pixman_implementation_t *imp,
629 			      pixman_composite_info_t *info)
630 {
631     PIXMAN_COMPOSITE_ARGS (info);
632     uint32_t    *dst_line, *dst;
633     uint32_t    *src_line, *src;
634     int dst_stride, src_stride;
635     int32_t w;
636 
637     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
638     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
639 
640     while (height--)
641     {
642 	dst = dst_line;
643 	dst_line += dst_stride;
644 	src = src_line;
645 	src_line += src_stride;
646 	w = width;
647 
648 	while (w--)
649 	    *dst++ = (*src++) | 0xff000000;
650     }
651 }
652 
653 #if 0
654 static void
655 fast_composite_over_8888_0888 (pixman_implementation_t *imp,
656 			       pixman_composite_info_t *info)
657 {
658     PIXMAN_COMPOSITE_ARGS (info);
659     uint8_t     *dst_line, *dst;
660     uint32_t d;
661     uint32_t    *src_line, *src, s;
662     uint8_t a;
663     int dst_stride, src_stride;
664     int32_t w;
665 
666     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3);
667     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
668 
669     while (height--)
670     {
671 	dst = dst_line;
672 	dst_line += dst_stride;
673 	src = src_line;
674 	src_line += src_stride;
675 	w = width;
676 
677 	while (w--)
678 	{
679 	    s = *src++;
680 	    a = s >> 24;
681 	    if (a)
682 	    {
683 		if (a == 0xff)
684 		    d = s;
685 		else
686 		    d = over (s, fetch_24 (dst));
687 
688 		store_24 (dst, d);
689 	    }
690 	    dst += 3;
691 	}
692     }
693 }
694 #endif
695 
696 static void
fast_composite_over_8888_0565(pixman_implementation_t * imp,pixman_composite_info_t * info)697 fast_composite_over_8888_0565 (pixman_implementation_t *imp,
698                                pixman_composite_info_t *info)
699 {
700     PIXMAN_COMPOSITE_ARGS (info);
701     uint16_t    *dst_line, *dst;
702     uint32_t d;
703     uint32_t    *src_line, *src, s;
704     uint8_t a;
705     int dst_stride, src_stride;
706     int32_t w;
707 
708     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
709     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
710 
711     while (height--)
712     {
713 	dst = dst_line;
714 	dst_line += dst_stride;
715 	src = src_line;
716 	src_line += src_stride;
717 	w = width;
718 
719 	while (w--)
720 	{
721 	    s = *src++;
722 	    a = s >> 24;
723 	    if (s)
724 	    {
725 		if (a == 0xff)
726 		{
727 		    d = s;
728 		}
729 		else
730 		{
731 		    d = *dst;
732 		    d = over (s, convert_0565_to_0888 (d));
733 		}
734 		*dst = convert_8888_to_0565 (d);
735 	    }
736 	    dst++;
737 	}
738     }
739 }
740 
741 static void
fast_composite_add_8_8(pixman_implementation_t * imp,pixman_composite_info_t * info)742 fast_composite_add_8_8 (pixman_implementation_t *imp,
743 			pixman_composite_info_t *info)
744 {
745     PIXMAN_COMPOSITE_ARGS (info);
746     uint8_t     *dst_line, *dst;
747     uint8_t     *src_line, *src;
748     int dst_stride, src_stride;
749     int32_t w;
750     uint8_t s, d;
751     uint16_t t;
752 
753     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
754     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
755 
756     while (height--)
757     {
758 	dst = dst_line;
759 	dst_line += dst_stride;
760 	src = src_line;
761 	src_line += src_stride;
762 	w = width;
763 
764 	while (w--)
765 	{
766 	    s = *src++;
767 	    if (s)
768 	    {
769 		if (s != 0xff)
770 		{
771 		    d = *dst;
772 		    t = d + s;
773 		    s = t | (0 - (t >> 8));
774 		}
775 		*dst = s;
776 	    }
777 	    dst++;
778 	}
779     }
780 }
781 
782 static void
fast_composite_add_0565_0565(pixman_implementation_t * imp,pixman_composite_info_t * info)783 fast_composite_add_0565_0565 (pixman_implementation_t *imp,
784                               pixman_composite_info_t *info)
785 {
786     PIXMAN_COMPOSITE_ARGS (info);
787     uint16_t    *dst_line, *dst;
788     uint32_t	d;
789     uint16_t    *src_line, *src;
790     uint32_t	s;
791     int dst_stride, src_stride;
792     int32_t w;
793 
794     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint16_t, src_stride, src_line, 1);
795     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
796 
797     while (height--)
798     {
799 	dst = dst_line;
800 	dst_line += dst_stride;
801 	src = src_line;
802 	src_line += src_stride;
803 	w = width;
804 
805 	while (w--)
806 	{
807 	    s = *src++;
808 	    if (s)
809 	    {
810 		d = *dst;
811 		s = convert_0565_to_8888 (s);
812 		if (d)
813 		{
814 		    d = convert_0565_to_8888 (d);
815 		    UN8x4_ADD_UN8x4 (s, d);
816 		}
817 		*dst = convert_8888_to_0565 (s);
818 	    }
819 	    dst++;
820 	}
821     }
822 }
823 
824 static void
fast_composite_add_8888_8888(pixman_implementation_t * imp,pixman_composite_info_t * info)825 fast_composite_add_8888_8888 (pixman_implementation_t *imp,
826                               pixman_composite_info_t *info)
827 {
828     PIXMAN_COMPOSITE_ARGS (info);
829     uint32_t    *dst_line, *dst;
830     uint32_t    *src_line, *src;
831     int dst_stride, src_stride;
832     int32_t w;
833     uint32_t s, d;
834 
835     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
836     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
837 
838     while (height--)
839     {
840 	dst = dst_line;
841 	dst_line += dst_stride;
842 	src = src_line;
843 	src_line += src_stride;
844 	w = width;
845 
846 	while (w--)
847 	{
848 	    s = *src++;
849 	    if (s)
850 	    {
851 		if (s != 0xffffffff)
852 		{
853 		    d = *dst;
854 		    if (d)
855 			UN8x4_ADD_UN8x4 (s, d);
856 		}
857 		*dst = s;
858 	    }
859 	    dst++;
860 	}
861     }
862 }
863 
864 static void
fast_composite_add_n_8_8(pixman_implementation_t * imp,pixman_composite_info_t * info)865 fast_composite_add_n_8_8 (pixman_implementation_t *imp,
866 			  pixman_composite_info_t *info)
867 {
868     PIXMAN_COMPOSITE_ARGS (info);
869     uint8_t     *dst_line, *dst;
870     uint8_t     *mask_line, *mask;
871     int dst_stride, mask_stride;
872     int32_t w;
873     uint32_t src;
874     uint8_t sa;
875 
876     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
877     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
878     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
879     sa = (src >> 24);
880 
881     while (height--)
882     {
883 	dst = dst_line;
884 	dst_line += dst_stride;
885 	mask = mask_line;
886 	mask_line += mask_stride;
887 	w = width;
888 
889 	while (w--)
890 	{
891 	    uint16_t tmp;
892 	    uint16_t a;
893 	    uint32_t m, d;
894 	    uint32_t r;
895 
896 	    a = *mask++;
897 	    d = *dst;
898 
899 	    m = MUL_UN8 (sa, a, tmp);
900 	    r = ADD_UN8 (m, d, tmp);
901 
902 	    *dst++ = r;
903 	}
904     }
905 }
906 
907 #ifdef WORDS_BIGENDIAN
908 #define CREATE_BITMASK(n) (0x80000000 >> (n))
909 #define UPDATE_BITMASK(n) ((n) >> 1)
910 #else
911 #define CREATE_BITMASK(n) (1U << (n))
912 #define UPDATE_BITMASK(n) ((n) << 1)
913 #endif
914 
915 #define TEST_BIT(p, n)					\
916     (*((p) + ((n) >> 5)) & CREATE_BITMASK ((n) & 31))
917 #define SET_BIT(p, n)							\
918     do { *((p) + ((n) >> 5)) |= CREATE_BITMASK ((n) & 31); } while (0);
919 
920 static void
fast_composite_add_1_1(pixman_implementation_t * imp,pixman_composite_info_t * info)921 fast_composite_add_1_1 (pixman_implementation_t *imp,
922 			pixman_composite_info_t *info)
923 {
924     PIXMAN_COMPOSITE_ARGS (info);
925     uint32_t     *dst_line, *dst;
926     uint32_t     *src_line, *src;
927     int           dst_stride, src_stride;
928     int32_t       w;
929 
930     PIXMAN_IMAGE_GET_LINE (src_image, 0, src_y, uint32_t,
931                            src_stride, src_line, 1);
932     PIXMAN_IMAGE_GET_LINE (dest_image, 0, dest_y, uint32_t,
933                            dst_stride, dst_line, 1);
934 
935     while (height--)
936     {
937 	dst = dst_line;
938 	dst_line += dst_stride;
939 	src = src_line;
940 	src_line += src_stride;
941 	w = width;
942 
943 	while (w--)
944 	{
945 	    /*
946 	     * TODO: improve performance by processing uint32_t data instead
947 	     *       of individual bits
948 	     */
949 	    if (TEST_BIT (src, src_x + w))
950 		SET_BIT (dst, dest_x + w);
951 	}
952     }
953 }
954 
955 static void
fast_composite_over_n_1_8888(pixman_implementation_t * imp,pixman_composite_info_t * info)956 fast_composite_over_n_1_8888 (pixman_implementation_t *imp,
957                               pixman_composite_info_t *info)
958 {
959     PIXMAN_COMPOSITE_ARGS (info);
960     uint32_t     src, srca;
961     uint32_t    *dst, *dst_line;
962     uint32_t    *mask, *mask_line;
963     int          mask_stride, dst_stride;
964     uint32_t     bitcache, bitmask;
965     int32_t      w;
966 
967     if (width <= 0)
968 	return;
969 
970     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
971     srca = src >> 24;
972     if (src == 0)
973 	return;
974 
975     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t,
976                            dst_stride, dst_line, 1);
977     PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t,
978                            mask_stride, mask_line, 1);
979     mask_line += mask_x >> 5;
980 
981     if (srca == 0xff)
982     {
983 	while (height--)
984 	{
985 	    dst = dst_line;
986 	    dst_line += dst_stride;
987 	    mask = mask_line;
988 	    mask_line += mask_stride;
989 	    w = width;
990 
991 	    bitcache = *mask++;
992 	    bitmask = CREATE_BITMASK (mask_x & 31);
993 
994 	    while (w--)
995 	    {
996 		if (bitmask == 0)
997 		{
998 		    bitcache = *mask++;
999 		    bitmask = CREATE_BITMASK (0);
1000 		}
1001 		if (bitcache & bitmask)
1002 		    *dst = src;
1003 		bitmask = UPDATE_BITMASK (bitmask);
1004 		dst++;
1005 	    }
1006 	}
1007     }
1008     else
1009     {
1010 	while (height--)
1011 	{
1012 	    dst = dst_line;
1013 	    dst_line += dst_stride;
1014 	    mask = mask_line;
1015 	    mask_line += mask_stride;
1016 	    w = width;
1017 
1018 	    bitcache = *mask++;
1019 	    bitmask = CREATE_BITMASK (mask_x & 31);
1020 
1021 	    while (w--)
1022 	    {
1023 		if (bitmask == 0)
1024 		{
1025 		    bitcache = *mask++;
1026 		    bitmask = CREATE_BITMASK (0);
1027 		}
1028 		if (bitcache & bitmask)
1029 		    *dst = over (src, *dst);
1030 		bitmask = UPDATE_BITMASK (bitmask);
1031 		dst++;
1032 	    }
1033 	}
1034     }
1035 }
1036 
1037 static void
fast_composite_over_n_1_0565(pixman_implementation_t * imp,pixman_composite_info_t * info)1038 fast_composite_over_n_1_0565 (pixman_implementation_t *imp,
1039                               pixman_composite_info_t *info)
1040 {
1041     PIXMAN_COMPOSITE_ARGS (info);
1042     uint32_t     src, srca;
1043     uint16_t    *dst, *dst_line;
1044     uint32_t    *mask, *mask_line;
1045     int          mask_stride, dst_stride;
1046     uint32_t     bitcache, bitmask;
1047     int32_t      w;
1048     uint32_t     d;
1049     uint16_t     src565;
1050 
1051     if (width <= 0)
1052 	return;
1053 
1054     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
1055     srca = src >> 24;
1056     if (src == 0)
1057 	return;
1058 
1059     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t,
1060                            dst_stride, dst_line, 1);
1061     PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t,
1062                            mask_stride, mask_line, 1);
1063     mask_line += mask_x >> 5;
1064 
1065     if (srca == 0xff)
1066     {
1067 	src565 = convert_8888_to_0565 (src);
1068 	while (height--)
1069 	{
1070 	    dst = dst_line;
1071 	    dst_line += dst_stride;
1072 	    mask = mask_line;
1073 	    mask_line += mask_stride;
1074 	    w = width;
1075 
1076 	    bitcache = *mask++;
1077 	    bitmask = CREATE_BITMASK (mask_x & 31);
1078 
1079 	    while (w--)
1080 	    {
1081 		if (bitmask == 0)
1082 		{
1083 		    bitcache = *mask++;
1084 		    bitmask = CREATE_BITMASK (0);
1085 		}
1086 		if (bitcache & bitmask)
1087 		    *dst = src565;
1088 		bitmask = UPDATE_BITMASK (bitmask);
1089 		dst++;
1090 	    }
1091 	}
1092     }
1093     else
1094     {
1095 	while (height--)
1096 	{
1097 	    dst = dst_line;
1098 	    dst_line += dst_stride;
1099 	    mask = mask_line;
1100 	    mask_line += mask_stride;
1101 	    w = width;
1102 
1103 	    bitcache = *mask++;
1104 	    bitmask = CREATE_BITMASK (mask_x & 31);
1105 
1106 	    while (w--)
1107 	    {
1108 		if (bitmask == 0)
1109 		{
1110 		    bitcache = *mask++;
1111 		    bitmask = CREATE_BITMASK (0);
1112 		}
1113 		if (bitcache & bitmask)
1114 		{
1115 		    d = over (src, convert_0565_to_0888 (*dst));
1116 		    *dst = convert_8888_to_0565 (d);
1117 		}
1118 		bitmask = UPDATE_BITMASK (bitmask);
1119 		dst++;
1120 	    }
1121 	}
1122     }
1123 }
1124 
1125 /*
1126  * Simple bitblt
1127  */
1128 
1129 static void
fast_composite_solid_fill(pixman_implementation_t * imp,pixman_composite_info_t * info)1130 fast_composite_solid_fill (pixman_implementation_t *imp,
1131                            pixman_composite_info_t *info)
1132 {
1133     PIXMAN_COMPOSITE_ARGS (info);
1134     uint32_t src;
1135 
1136     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
1137 
1138     if (dest_image->bits.format == PIXMAN_a1)
1139     {
1140 	src = src >> 31;
1141     }
1142     else if (dest_image->bits.format == PIXMAN_a8)
1143     {
1144 	src = src >> 24;
1145     }
1146     else if (dest_image->bits.format == PIXMAN_r5g6b5 ||
1147              dest_image->bits.format == PIXMAN_b5g6r5)
1148     {
1149 	src = convert_8888_to_0565 (src);
1150     }
1151 
1152     pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride,
1153                  PIXMAN_FORMAT_BPP (dest_image->bits.format),
1154                  dest_x, dest_y,
1155                  width, height,
1156                  src);
1157 }
1158 
1159 static void
fast_composite_src_memcpy(pixman_implementation_t * imp,pixman_composite_info_t * info)1160 fast_composite_src_memcpy (pixman_implementation_t *imp,
1161 			   pixman_composite_info_t *info)
1162 {
1163     PIXMAN_COMPOSITE_ARGS (info);
1164     int bpp = PIXMAN_FORMAT_BPP (dest_image->bits.format) / 8;
1165     uint32_t n_bytes = width * bpp;
1166     int dst_stride, src_stride;
1167     uint8_t    *dst;
1168     uint8_t    *src;
1169 
1170     src_stride = src_image->bits.rowstride * 4;
1171     dst_stride = dest_image->bits.rowstride * 4;
1172 
1173     src = (uint8_t *)src_image->bits.bits + src_y * src_stride + src_x * bpp;
1174     dst = (uint8_t *)dest_image->bits.bits + dest_y * dst_stride + dest_x * bpp;
1175 
1176     while (height--)
1177     {
1178 	memcpy (dst, src, n_bytes);
1179 
1180 	dst += dst_stride;
1181 	src += src_stride;
1182     }
1183 }
1184 
1185 FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, SRC, COVER)
1186 FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, SRC, NONE)
1187 FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, SRC, PAD)
1188 FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, SRC, NORMAL)
1189 FAST_NEAREST (x888_8888_cover, x888, 8888, uint32_t, uint32_t, SRC, COVER)
1190 FAST_NEAREST (x888_8888_pad, x888, 8888, uint32_t, uint32_t, SRC, PAD)
1191 FAST_NEAREST (x888_8888_normal, x888, 8888, uint32_t, uint32_t, SRC, NORMAL)
1192 FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, OVER, COVER)
1193 FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, OVER, NONE)
1194 FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, OVER, PAD)
1195 FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, OVER, NORMAL)
1196 FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, SRC, COVER)
1197 FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, SRC, NONE)
1198 FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, SRC, PAD)
1199 FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, SRC, NORMAL)
1200 FAST_NEAREST (565_565_normal, 0565, 0565, uint16_t, uint16_t, SRC, NORMAL)
1201 FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, OVER, COVER)
1202 FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, NONE)
1203 FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, OVER, PAD)
1204 FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL)
1205 
1206 #define REPEAT_MIN_WIDTH    32
1207 
1208 static void
fast_composite_tiled_repeat(pixman_implementation_t * imp,pixman_composite_info_t * info)1209 fast_composite_tiled_repeat (pixman_implementation_t *imp,
1210 			     pixman_composite_info_t *info)
1211 {
1212     PIXMAN_COMPOSITE_ARGS (info);
1213     pixman_composite_func_t func;
1214     pixman_format_code_t mask_format;
1215     uint32_t src_flags, mask_flags;
1216     int32_t sx, sy;
1217     int32_t width_remain;
1218     int32_t num_pixels;
1219     int32_t src_width;
1220     int32_t i, j;
1221     pixman_image_t extended_src_image;
1222     uint32_t extended_src[REPEAT_MIN_WIDTH * 2];
1223     pixman_bool_t need_src_extension;
1224     uint32_t *src_line;
1225     int32_t src_stride;
1226     int32_t src_bpp;
1227     pixman_composite_info_t info2 = *info;
1228 
1229     src_flags = (info->src_flags & ~FAST_PATH_NORMAL_REPEAT) |
1230 		    FAST_PATH_SAMPLES_COVER_CLIP_NEAREST;
1231 
1232     if (mask_image)
1233     {
1234 	mask_format = mask_image->common.extended_format_code;
1235 	mask_flags = info->mask_flags;
1236     }
1237     else
1238     {
1239 	mask_format = PIXMAN_null;
1240 	mask_flags = FAST_PATH_IS_OPAQUE;
1241     }
1242 
1243     _pixman_implementation_lookup_composite (
1244 	imp->toplevel, info->op,
1245 	src_image->common.extended_format_code, src_flags,
1246 	mask_format, mask_flags,
1247 	dest_image->common.extended_format_code, info->dest_flags,
1248 	&imp, &func);
1249 
1250     src_bpp = PIXMAN_FORMAT_BPP (src_image->bits.format);
1251 
1252     if (src_image->bits.width < REPEAT_MIN_WIDTH		&&
1253 	(src_bpp == 32 || src_bpp == 16 || src_bpp == 8)	&&
1254 	!src_image->bits.indexed)
1255     {
1256 	sx = src_x;
1257 	sx = MOD (sx, src_image->bits.width);
1258 	sx += width;
1259 	src_width = 0;
1260 
1261 	while (src_width < REPEAT_MIN_WIDTH && src_width <= sx)
1262 	    src_width += src_image->bits.width;
1263 
1264 	src_stride = (src_width * (src_bpp >> 3) + 3) / (int) sizeof (uint32_t);
1265 
1266 	/* Initialize/validate stack-allocated temporary image */
1267 	_pixman_bits_image_init (&extended_src_image, src_image->bits.format,
1268 				 src_width, 1, &extended_src[0], src_stride,
1269 				 FALSE);
1270 	_pixman_image_validate (&extended_src_image);
1271 
1272 	info2.src_image = &extended_src_image;
1273 	need_src_extension = TRUE;
1274     }
1275     else
1276     {
1277 	src_width = src_image->bits.width;
1278 	need_src_extension = FALSE;
1279     }
1280 
1281     sx = src_x;
1282     sy = src_y;
1283 
1284     while (--height >= 0)
1285     {
1286 	sx = MOD (sx, src_width);
1287 	sy = MOD (sy, src_image->bits.height);
1288 
1289 	if (need_src_extension)
1290 	{
1291 	    if (src_bpp == 32)
1292 	    {
1293 		PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint32_t, src_stride, src_line, 1);
1294 
1295 		for (i = 0; i < src_width; )
1296 		{
1297 		    for (j = 0; j < src_image->bits.width; j++, i++)
1298 			extended_src[i] = src_line[j];
1299 		}
1300 	    }
1301 	    else if (src_bpp == 16)
1302 	    {
1303 		uint16_t *src_line_16;
1304 
1305 		PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint16_t, src_stride,
1306 				       src_line_16, 1);
1307 		src_line = (uint32_t*)src_line_16;
1308 
1309 		for (i = 0; i < src_width; )
1310 		{
1311 		    for (j = 0; j < src_image->bits.width; j++, i++)
1312 			((uint16_t*)extended_src)[i] = ((uint16_t*)src_line)[j];
1313 		}
1314 	    }
1315 	    else if (src_bpp == 8)
1316 	    {
1317 		uint8_t *src_line_8;
1318 
1319 		PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint8_t, src_stride,
1320 				       src_line_8, 1);
1321 		src_line = (uint32_t*)src_line_8;
1322 
1323 		for (i = 0; i < src_width; )
1324 		{
1325 		    for (j = 0; j < src_image->bits.width; j++, i++)
1326 			((uint8_t*)extended_src)[i] = ((uint8_t*)src_line)[j];
1327 		}
1328 	    }
1329 
1330 	    info2.src_y = 0;
1331 	}
1332 	else
1333 	{
1334 	    info2.src_y = sy;
1335 	}
1336 
1337 	width_remain = width;
1338 
1339 	while (width_remain > 0)
1340 	{
1341 	    num_pixels = src_width - sx;
1342 
1343 	    if (num_pixels > width_remain)
1344 		num_pixels = width_remain;
1345 
1346 	    info2.src_x = sx;
1347 	    info2.width = num_pixels;
1348 	    info2.height = 1;
1349 
1350 	    func (imp, &info2);
1351 
1352 	    width_remain -= num_pixels;
1353 	    info2.mask_x += num_pixels;
1354 	    info2.dest_x += num_pixels;
1355 	    sx = 0;
1356 	}
1357 
1358 	sx = src_x;
1359 	sy++;
1360 	info2.mask_x = info->mask_x;
1361 	info2.mask_y++;
1362 	info2.dest_x = info->dest_x;
1363 	info2.dest_y++;
1364     }
1365 
1366     if (need_src_extension)
1367 	_pixman_image_fini (&extended_src_image);
1368 }
1369 
1370 /* Use more unrolling for src_0565_0565 because it is typically CPU bound */
1371 static force_inline void
scaled_nearest_scanline_565_565_SRC(uint16_t * dst,const uint16_t * src,int32_t w,pixman_fixed_t vx,pixman_fixed_t unit_x,pixman_fixed_t max_vx,pixman_bool_t fully_transparent_src)1372 scaled_nearest_scanline_565_565_SRC (uint16_t *       dst,
1373 				     const uint16_t * src,
1374 				     int32_t          w,
1375 				     pixman_fixed_t   vx,
1376 				     pixman_fixed_t   unit_x,
1377 				     pixman_fixed_t   max_vx,
1378 				     pixman_bool_t    fully_transparent_src)
1379 {
1380     uint16_t tmp1, tmp2, tmp3, tmp4;
1381     while ((w -= 4) >= 0)
1382     {
1383 	tmp1 = *(src + pixman_fixed_to_int (vx));
1384 	vx += unit_x;
1385 	tmp2 = *(src + pixman_fixed_to_int (vx));
1386 	vx += unit_x;
1387 	tmp3 = *(src + pixman_fixed_to_int (vx));
1388 	vx += unit_x;
1389 	tmp4 = *(src + pixman_fixed_to_int (vx));
1390 	vx += unit_x;
1391 	*dst++ = tmp1;
1392 	*dst++ = tmp2;
1393 	*dst++ = tmp3;
1394 	*dst++ = tmp4;
1395     }
1396     if (w & 2)
1397     {
1398 	tmp1 = *(src + pixman_fixed_to_int (vx));
1399 	vx += unit_x;
1400 	tmp2 = *(src + pixman_fixed_to_int (vx));
1401 	vx += unit_x;
1402 	*dst++ = tmp1;
1403 	*dst++ = tmp2;
1404     }
1405     if (w & 1)
1406 	*dst = *(src + pixman_fixed_to_int (vx));
1407 }
1408 
1409 FAST_NEAREST_MAINLOOP (565_565_cover_SRC,
1410 		       scaled_nearest_scanline_565_565_SRC,
1411 		       uint16_t, uint16_t, COVER)
1412 FAST_NEAREST_MAINLOOP (565_565_none_SRC,
1413 		       scaled_nearest_scanline_565_565_SRC,
1414 		       uint16_t, uint16_t, NONE)
1415 FAST_NEAREST_MAINLOOP (565_565_pad_SRC,
1416 		       scaled_nearest_scanline_565_565_SRC,
1417 		       uint16_t, uint16_t, PAD)
1418 
1419 static force_inline uint32_t
fetch_nearest(pixman_repeat_t src_repeat,pixman_format_code_t format,uint32_t * src,int x,int src_width)1420 fetch_nearest (pixman_repeat_t src_repeat,
1421 	       pixman_format_code_t format,
1422 	       uint32_t *src, int x, int src_width)
1423 {
1424     if (repeat (src_repeat, &x, src_width))
1425     {
1426 	if (format == PIXMAN_x8r8g8b8 || format == PIXMAN_x8b8g8r8)
1427 	    return *(src + x) | 0xff000000;
1428 	else
1429 	    return *(src + x);
1430     }
1431     else
1432     {
1433 	return 0;
1434     }
1435 }
1436 
1437 static force_inline void
combine_over(uint32_t s,uint32_t * dst)1438 combine_over (uint32_t s, uint32_t *dst)
1439 {
1440     if (s)
1441     {
1442 	uint8_t ia = 0xff - (s >> 24);
1443 
1444 	if (ia)
1445 	    UN8x4_MUL_UN8_ADD_UN8x4 (*dst, ia, s);
1446 	else
1447 	    *dst = s;
1448     }
1449 }
1450 
1451 static force_inline void
combine_src(uint32_t s,uint32_t * dst)1452 combine_src (uint32_t s, uint32_t *dst)
1453 {
1454     *dst = s;
1455 }
1456 
1457 static void
fast_composite_scaled_nearest(pixman_implementation_t * imp,pixman_composite_info_t * info)1458 fast_composite_scaled_nearest (pixman_implementation_t *imp,
1459 			       pixman_composite_info_t *info)
1460 {
1461     PIXMAN_COMPOSITE_ARGS (info);
1462     uint32_t       *dst_line;
1463     uint32_t       *src_line;
1464     int             dst_stride, src_stride;
1465     int		    src_width, src_height;
1466     pixman_repeat_t src_repeat;
1467     pixman_fixed_t unit_x, unit_y;
1468     pixman_format_code_t src_format;
1469     pixman_vector_t v;
1470     pixman_fixed_t vy;
1471 
1472     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
1473     /* pass in 0 instead of src_x and src_y because src_x and src_y need to be
1474      * transformed from destination space to source space
1475      */
1476     PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, uint32_t, src_stride, src_line, 1);
1477 
1478     /* reference point is the center of the pixel */
1479     v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;
1480     v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;
1481     v.vector[2] = pixman_fixed_1;
1482 
1483     if (!pixman_transform_point_3d (src_image->common.transform, &v))
1484 	return;
1485 
1486     unit_x = src_image->common.transform->matrix[0][0];
1487     unit_y = src_image->common.transform->matrix[1][1];
1488 
1489     /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */
1490     v.vector[0] -= pixman_fixed_e;
1491     v.vector[1] -= pixman_fixed_e;
1492 
1493     src_height = src_image->bits.height;
1494     src_width = src_image->bits.width;
1495     src_repeat = src_image->common.repeat;
1496     src_format = src_image->bits.format;
1497 
1498     vy = v.vector[1];
1499     while (height--)
1500     {
1501         pixman_fixed_t vx = v.vector[0];
1502 	int y = pixman_fixed_to_int (vy);
1503 	uint32_t *dst = dst_line;
1504 
1505 	dst_line += dst_stride;
1506 
1507         /* adjust the y location by a unit vector in the y direction
1508          * this is equivalent to transforming y+1 of the destination point to source space */
1509         vy += unit_y;
1510 
1511 	if (!repeat (src_repeat, &y, src_height))
1512 	{
1513 	    if (op == PIXMAN_OP_SRC)
1514 		memset (dst, 0, sizeof (*dst) * width);
1515 	}
1516 	else
1517 	{
1518 	    int w = width;
1519 
1520 	    uint32_t *src = src_line + y * src_stride;
1521 
1522 	    while (w >= 2)
1523 	    {
1524 		uint32_t s1, s2;
1525 		int x1, x2;
1526 
1527 		x1 = pixman_fixed_to_int (vx);
1528 		vx += unit_x;
1529 
1530 		x2 = pixman_fixed_to_int (vx);
1531 		vx += unit_x;
1532 
1533 		w -= 2;
1534 
1535 		s1 = fetch_nearest (src_repeat, src_format, src, x1, src_width);
1536 		s2 = fetch_nearest (src_repeat, src_format, src, x2, src_width);
1537 
1538 		if (op == PIXMAN_OP_OVER)
1539 		{
1540 		    combine_over (s1, dst++);
1541 		    combine_over (s2, dst++);
1542 		}
1543 		else
1544 		{
1545 		    combine_src (s1, dst++);
1546 		    combine_src (s2, dst++);
1547 		}
1548 	    }
1549 
1550 	    while (w--)
1551 	    {
1552 		uint32_t s;
1553 		int x;
1554 
1555 		x = pixman_fixed_to_int (vx);
1556 		vx += unit_x;
1557 
1558 		s = fetch_nearest (src_repeat, src_format, src, x, src_width);
1559 
1560 		if (op == PIXMAN_OP_OVER)
1561 		    combine_over (s, dst++);
1562 		else
1563 		    combine_src (s, dst++);
1564 	    }
1565 	}
1566     }
1567 }
1568 
1569 #define CACHE_LINE_SIZE 64
1570 
1571 #define FAST_SIMPLE_ROTATE(suffix, pix_type)                                  \
1572                                                                               \
1573 static void                                                                   \
1574 blt_rotated_90_trivial_##suffix (pix_type       *dst,                         \
1575 				 int             dst_stride,                  \
1576 				 const pix_type *src,                         \
1577 				 int             src_stride,                  \
1578 				 int             w,                           \
1579 				 int             h)                           \
1580 {                                                                             \
1581     int x, y;                                                                 \
1582     for (y = 0; y < h; y++)                                                   \
1583     {                                                                         \
1584 	const pix_type *s = src + (h - y - 1);                                \
1585 	pix_type *d = dst + dst_stride * y;                                   \
1586 	for (x = 0; x < w; x++)                                               \
1587 	{                                                                     \
1588 	    *d++ = *s;                                                        \
1589 	    s += src_stride;                                                  \
1590 	}                                                                     \
1591     }                                                                         \
1592 }                                                                             \
1593                                                                               \
1594 static void                                                                   \
1595 blt_rotated_270_trivial_##suffix (pix_type       *dst,                        \
1596 				  int             dst_stride,                 \
1597 				  const pix_type *src,                        \
1598 				  int             src_stride,                 \
1599 				  int             w,                          \
1600 				  int             h)                          \
1601 {                                                                             \
1602     int x, y;                                                                 \
1603     for (y = 0; y < h; y++)                                                   \
1604     {                                                                         \
1605 	const pix_type *s = src + src_stride * (w - 1) + y;                   \
1606 	pix_type *d = dst + dst_stride * y;                                   \
1607 	for (x = 0; x < w; x++)                                               \
1608 	{                                                                     \
1609 	    *d++ = *s;                                                        \
1610 	    s -= src_stride;                                                  \
1611 	}                                                                     \
1612     }                                                                         \
1613 }                                                                             \
1614                                                                               \
1615 static void                                                                   \
1616 blt_rotated_90_##suffix (pix_type       *dst,                                 \
1617 			 int             dst_stride,                          \
1618 			 const pix_type *src,                                 \
1619 			 int             src_stride,                          \
1620 			 int             W,                                   \
1621 			 int             H)                                   \
1622 {                                                                             \
1623     int x;                                                                    \
1624     int leading_pixels = 0, trailing_pixels = 0;                              \
1625     const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type);                 \
1626                                                                               \
1627     /*                                                                        \
1628      * split processing into handling destination as TILE_SIZExH cache line   \
1629      * aligned vertical stripes (optimistically assuming that destination     \
1630      * stride is a multiple of cache line, if not - it will be just a bit     \
1631      * slower)                                                                \
1632      */                                                                       \
1633                                                                               \
1634     if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1))                               \
1635     {                                                                         \
1636 	leading_pixels = TILE_SIZE - (((uintptr_t)dst &                       \
1637 			    (CACHE_LINE_SIZE - 1)) / sizeof(pix_type));       \
1638 	if (leading_pixels > W)                                               \
1639 	    leading_pixels = W;                                               \
1640                                                                               \
1641 	/* unaligned leading part NxH (where N < TILE_SIZE) */                \
1642 	blt_rotated_90_trivial_##suffix (                                     \
1643 	    dst,                                                              \
1644 	    dst_stride,                                                       \
1645 	    src,                                                              \
1646 	    src_stride,                                                       \
1647 	    leading_pixels,                                                   \
1648 	    H);                                                               \
1649 	                                                                      \
1650 	dst += leading_pixels;                                                \
1651 	src += leading_pixels * src_stride;                                   \
1652 	W -= leading_pixels;                                                  \
1653     }                                                                         \
1654                                                                               \
1655     if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1))                         \
1656     {                                                                         \
1657 	trailing_pixels = (((uintptr_t)(dst + W) &                            \
1658 			    (CACHE_LINE_SIZE - 1)) / sizeof(pix_type));       \
1659 	if (trailing_pixels > W)                                              \
1660 	    trailing_pixels = W;                                              \
1661 	W -= trailing_pixels;                                                 \
1662     }                                                                         \
1663                                                                               \
1664     for (x = 0; x < W; x += TILE_SIZE)                                        \
1665     {                                                                         \
1666 	/* aligned middle part TILE_SIZExH */                                 \
1667 	blt_rotated_90_trivial_##suffix (                                     \
1668 	    dst + x,                                                          \
1669 	    dst_stride,                                                       \
1670 	    src + src_stride * x,                                             \
1671 	    src_stride,                                                       \
1672 	    TILE_SIZE,                                                        \
1673 	    H);                                                               \
1674     }                                                                         \
1675                                                                               \
1676     if (trailing_pixels)                                                      \
1677     {                                                                         \
1678 	/* unaligned trailing part NxH (where N < TILE_SIZE) */               \
1679 	blt_rotated_90_trivial_##suffix (                                     \
1680 	    dst + W,                                                          \
1681 	    dst_stride,                                                       \
1682 	    src + W * src_stride,                                             \
1683 	    src_stride,                                                       \
1684 	    trailing_pixels,                                                  \
1685 	    H);                                                               \
1686     }                                                                         \
1687 }                                                                             \
1688                                                                               \
1689 static void                                                                   \
1690 blt_rotated_270_##suffix (pix_type       *dst,                                \
1691 			  int             dst_stride,                         \
1692 			  const pix_type *src,                                \
1693 			  int             src_stride,                         \
1694 			  int             W,                                  \
1695 			  int             H)                                  \
1696 {                                                                             \
1697     int x;                                                                    \
1698     int leading_pixels = 0, trailing_pixels = 0;                              \
1699     const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type);                 \
1700                                                                               \
1701     /*                                                                        \
1702      * split processing into handling destination as TILE_SIZExH cache line   \
1703      * aligned vertical stripes (optimistically assuming that destination     \
1704      * stride is a multiple of cache line, if not - it will be just a bit     \
1705      * slower)                                                                \
1706      */                                                                       \
1707                                                                               \
1708     if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1))                               \
1709     {                                                                         \
1710 	leading_pixels = TILE_SIZE - (((uintptr_t)dst &                       \
1711 			    (CACHE_LINE_SIZE - 1)) / sizeof(pix_type));       \
1712 	if (leading_pixels > W)                                               \
1713 	    leading_pixels = W;                                               \
1714                                                                               \
1715 	/* unaligned leading part NxH (where N < TILE_SIZE) */                \
1716 	blt_rotated_270_trivial_##suffix (                                    \
1717 	    dst,                                                              \
1718 	    dst_stride,                                                       \
1719 	    src + src_stride * (W - leading_pixels),                          \
1720 	    src_stride,                                                       \
1721 	    leading_pixels,                                                   \
1722 	    H);                                                               \
1723 	                                                                      \
1724 	dst += leading_pixels;                                                \
1725 	W -= leading_pixels;                                                  \
1726     }                                                                         \
1727                                                                               \
1728     if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1))                         \
1729     {                                                                         \
1730 	trailing_pixels = (((uintptr_t)(dst + W) &                            \
1731 			    (CACHE_LINE_SIZE - 1)) / sizeof(pix_type));       \
1732 	if (trailing_pixels > W)                                              \
1733 	    trailing_pixels = W;                                              \
1734 	W -= trailing_pixels;                                                 \
1735 	src += trailing_pixels * src_stride;                                  \
1736     }                                                                         \
1737                                                                               \
1738     for (x = 0; x < W; x += TILE_SIZE)                                        \
1739     {                                                                         \
1740 	/* aligned middle part TILE_SIZExH */                                 \
1741 	blt_rotated_270_trivial_##suffix (                                    \
1742 	    dst + x,                                                          \
1743 	    dst_stride,                                                       \
1744 	    src + src_stride * (W - x - TILE_SIZE),                           \
1745 	    src_stride,                                                       \
1746 	    TILE_SIZE,                                                        \
1747 	    H);                                                               \
1748     }                                                                         \
1749                                                                               \
1750     if (trailing_pixels)                                                      \
1751     {                                                                         \
1752 	/* unaligned trailing part NxH (where N < TILE_SIZE) */               \
1753 	blt_rotated_270_trivial_##suffix (                                    \
1754 	    dst + W,                                                          \
1755 	    dst_stride,                                                       \
1756 	    src - trailing_pixels * src_stride,                               \
1757 	    src_stride,                                                       \
1758 	    trailing_pixels,                                                  \
1759 	    H);                                                               \
1760     }                                                                         \
1761 }                                                                             \
1762                                                                               \
1763 static void                                                                   \
1764 fast_composite_rotate_90_##suffix (pixman_implementation_t *imp,              \
1765 				   pixman_composite_info_t *info)	      \
1766 {									      \
1767     PIXMAN_COMPOSITE_ARGS (info);					      \
1768     pix_type       *dst_line;						      \
1769     pix_type       *src_line;                                                 \
1770     int             dst_stride, src_stride;                                   \
1771     int             src_x_t, src_y_t;                                         \
1772                                                                               \
1773     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, pix_type,              \
1774 			   dst_stride, dst_line, 1);                          \
1775     src_x_t = -src_y + pixman_fixed_to_int (                                  \
1776 				src_image->common.transform->matrix[0][2] +   \
1777 				pixman_fixed_1 / 2 - pixman_fixed_e) - height;\
1778     src_y_t = src_x + pixman_fixed_to_int (                                   \
1779 				src_image->common.transform->matrix[1][2] +   \
1780 				pixman_fixed_1 / 2 - pixman_fixed_e);         \
1781     PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type,             \
1782 			   src_stride, src_line, 1);                          \
1783     blt_rotated_90_##suffix (dst_line, dst_stride, src_line, src_stride,      \
1784 			     width, height);                                  \
1785 }                                                                             \
1786                                                                               \
1787 static void                                                                   \
1788 fast_composite_rotate_270_##suffix (pixman_implementation_t *imp,             \
1789 				    pixman_composite_info_t *info)            \
1790 {                                                                             \
1791     PIXMAN_COMPOSITE_ARGS (info);					      \
1792     pix_type       *dst_line;						      \
1793     pix_type       *src_line;                                                 \
1794     int             dst_stride, src_stride;                                   \
1795     int             src_x_t, src_y_t;                                         \
1796                                                                               \
1797     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, pix_type,              \
1798 			   dst_stride, dst_line, 1);                          \
1799     src_x_t = src_y + pixman_fixed_to_int (                                   \
1800 				src_image->common.transform->matrix[0][2] +   \
1801 				pixman_fixed_1 / 2 - pixman_fixed_e);         \
1802     src_y_t = -src_x + pixman_fixed_to_int (                                  \
1803 				src_image->common.transform->matrix[1][2] +   \
1804 				pixman_fixed_1 / 2 - pixman_fixed_e) - width; \
1805     PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type,             \
1806 			   src_stride, src_line, 1);                          \
1807     blt_rotated_270_##suffix (dst_line, dst_stride, src_line, src_stride,     \
1808 			      width, height);                                 \
1809 }
1810 
1811 FAST_SIMPLE_ROTATE (8, uint8_t)
1812 FAST_SIMPLE_ROTATE (565, uint16_t)
1813 FAST_SIMPLE_ROTATE (8888, uint32_t)
1814 
1815 static const pixman_fast_path_t c_fast_paths[] =
1816 {
1817     PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, fast_composite_over_n_8_0565),
1818     PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, fast_composite_over_n_8_0565),
1819     PIXMAN_STD_FAST_PATH (OVER, solid, a8, r8g8b8, fast_composite_over_n_8_0888),
1820     PIXMAN_STD_FAST_PATH (OVER, solid, a8, b8g8r8, fast_composite_over_n_8_0888),
1821     PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, fast_composite_over_n_8_8888),
1822     PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, fast_composite_over_n_8_8888),
1823     PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, fast_composite_over_n_8_8888),
1824     PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, fast_composite_over_n_8_8888),
1825     PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8r8g8b8, fast_composite_over_n_1_8888),
1826     PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8r8g8b8, fast_composite_over_n_1_8888),
1827     PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8b8g8r8, fast_composite_over_n_1_8888),
1828     PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8b8g8r8, fast_composite_over_n_1_8888),
1829     PIXMAN_STD_FAST_PATH (OVER, solid, a1, r5g6b5,   fast_composite_over_n_1_0565),
1830     PIXMAN_STD_FAST_PATH (OVER, solid, a1, b5g6r5,   fast_composite_over_n_1_0565),
1831     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, fast_composite_over_n_8888_8888_ca),
1832     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, fast_composite_over_n_8888_8888_ca),
1833     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, fast_composite_over_n_8888_0565_ca),
1834     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, fast_composite_over_n_8888_8888_ca),
1835     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, fast_composite_over_n_8888_8888_ca),
1836     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, fast_composite_over_n_8888_0565_ca),
1837     PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, fast_composite_over_x888_8_8888),
1838     PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, fast_composite_over_x888_8_8888),
1839     PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, fast_composite_over_x888_8_8888),
1840     PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, fast_composite_over_x888_8_8888),
1841     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, fast_composite_over_8888_8888),
1842     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, fast_composite_over_8888_8888),
1843     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, fast_composite_over_8888_0565),
1844     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, fast_composite_over_8888_8888),
1845     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, fast_composite_over_8888_8888),
1846     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, fast_composite_over_8888_0565),
1847     PIXMAN_STD_FAST_PATH (ADD, r5g6b5, null, r5g6b5, fast_composite_add_0565_0565),
1848     PIXMAN_STD_FAST_PATH (ADD, b5g6r5, null, b5g6r5, fast_composite_add_0565_0565),
1849     PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, fast_composite_add_8888_8888),
1850     PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, fast_composite_add_8888_8888),
1851     PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, fast_composite_add_8_8),
1852     PIXMAN_STD_FAST_PATH (ADD, a1, null, a1, fast_composite_add_1_1),
1853     PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, fast_composite_add_n_8888_8888_ca),
1854     PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, fast_composite_add_n_8_8),
1855     PIXMAN_STD_FAST_PATH (SRC, solid, null, a8r8g8b8, fast_composite_solid_fill),
1856     PIXMAN_STD_FAST_PATH (SRC, solid, null, x8r8g8b8, fast_composite_solid_fill),
1857     PIXMAN_STD_FAST_PATH (SRC, solid, null, a8b8g8r8, fast_composite_solid_fill),
1858     PIXMAN_STD_FAST_PATH (SRC, solid, null, x8b8g8r8, fast_composite_solid_fill),
1859     PIXMAN_STD_FAST_PATH (SRC, solid, null, a1, fast_composite_solid_fill),
1860     PIXMAN_STD_FAST_PATH (SRC, solid, null, a8, fast_composite_solid_fill),
1861     PIXMAN_STD_FAST_PATH (SRC, solid, null, r5g6b5, fast_composite_solid_fill),
1862     PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, fast_composite_src_x888_8888),
1863     PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, fast_composite_src_x888_8888),
1864     PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy),
1865     PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, fast_composite_src_memcpy),
1866     PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy),
1867     PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy),
1868     PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, fast_composite_src_memcpy),
1869     PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy),
1870     PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8x8, fast_composite_src_memcpy),
1871     PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8a8, fast_composite_src_memcpy),
1872     PIXMAN_STD_FAST_PATH (SRC, b8g8r8x8, null, b8g8r8x8, fast_composite_src_memcpy),
1873     PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, fast_composite_src_memcpy),
1874     PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, fast_composite_src_memcpy),
1875     PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, fast_composite_src_memcpy),
1876     PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, b8g8r8, fast_composite_src_memcpy),
1877     PIXMAN_STD_FAST_PATH (SRC, x1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),
1878     PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),
1879     PIXMAN_STD_FAST_PATH (SRC, a8, null, a8, fast_composite_src_memcpy),
1880     PIXMAN_STD_FAST_PATH (IN, a8, null, a8, fast_composite_in_8_8),
1881     PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, fast_composite_in_n_8_8),
1882 
1883     SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888_8888),
1884     SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888_8888),
1885     SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, 8888_8888),
1886     SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, 8888_8888),
1887 
1888     SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888_8888),
1889     SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, 8888_8888),
1890 
1891     SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, 8888_565),
1892     SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, 8888_565),
1893 
1894     SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, 565_565),
1895 
1896     SIMPLE_NEAREST_FAST_PATH_COVER (SRC, x8r8g8b8, a8r8g8b8, x888_8888),
1897     SIMPLE_NEAREST_FAST_PATH_COVER (SRC, x8b8g8r8, a8b8g8r8, x888_8888),
1898     SIMPLE_NEAREST_FAST_PATH_PAD (SRC, x8r8g8b8, a8r8g8b8, x888_8888),
1899     SIMPLE_NEAREST_FAST_PATH_PAD (SRC, x8b8g8r8, a8b8g8r8, x888_8888),
1900     SIMPLE_NEAREST_FAST_PATH_NORMAL (SRC, x8r8g8b8, a8r8g8b8, x888_8888),
1901     SIMPLE_NEAREST_FAST_PATH_NORMAL (SRC, x8b8g8r8, a8b8g8r8, x888_8888),
1902 
1903     SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, 8888_8888),
1904     SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, 8888_8888),
1905     SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, 8888_8888),
1906     SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, 8888_8888),
1907 
1908     SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, 8888_565),
1909 
1910 #define NEAREST_FAST_PATH(op,s,d)		\
1911     {   PIXMAN_OP_ ## op,			\
1912 	PIXMAN_ ## s, SCALED_NEAREST_FLAGS,	\
1913 	PIXMAN_null, 0,				\
1914 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,	\
1915 	fast_composite_scaled_nearest,		\
1916     }
1917 
1918     NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8),
1919     NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8),
1920     NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8),
1921     NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8),
1922 
1923     NEAREST_FAST_PATH (SRC, x8r8g8b8, a8r8g8b8),
1924     NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8),
1925     NEAREST_FAST_PATH (SRC, x8b8g8r8, a8b8g8r8),
1926     NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8),
1927 
1928     NEAREST_FAST_PATH (OVER, x8r8g8b8, x8r8g8b8),
1929     NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8),
1930     NEAREST_FAST_PATH (OVER, x8b8g8r8, x8b8g8r8),
1931     NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8),
1932 
1933     NEAREST_FAST_PATH (OVER, x8r8g8b8, a8r8g8b8),
1934     NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8),
1935     NEAREST_FAST_PATH (OVER, x8b8g8r8, a8b8g8r8),
1936     NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8),
1937 
1938 #define SIMPLE_ROTATE_FLAGS(angle)					  \
1939     (FAST_PATH_ROTATE_ ## angle ## _TRANSFORM	|			  \
1940      FAST_PATH_NEAREST_FILTER			|			  \
1941      FAST_PATH_SAMPLES_COVER_CLIP_NEAREST	|			  \
1942      FAST_PATH_STANDARD_FLAGS)
1943 
1944 #define SIMPLE_ROTATE_FAST_PATH(op,s,d,suffix)				  \
1945     {   PIXMAN_OP_ ## op,						  \
1946 	PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (90),				  \
1947 	PIXMAN_null, 0,							  \
1948 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				  \
1949 	fast_composite_rotate_90_##suffix,				  \
1950     },									  \
1951     {   PIXMAN_OP_ ## op,						  \
1952 	PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (270),			  \
1953 	PIXMAN_null, 0,							  \
1954 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				  \
1955 	fast_composite_rotate_270_##suffix,				  \
1956     }
1957 
1958     SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888),
1959     SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888),
1960     SIMPLE_ROTATE_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888),
1961     SIMPLE_ROTATE_FAST_PATH (SRC, r5g6b5, r5g6b5, 565),
1962     SIMPLE_ROTATE_FAST_PATH (SRC, a8, a8, 8),
1963 
1964     /* Simple repeat fast path entry. */
1965     {	PIXMAN_OP_any,
1966 	PIXMAN_any,
1967 	(FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | FAST_PATH_BITS_IMAGE |
1968 	 FAST_PATH_NORMAL_REPEAT),
1969 	PIXMAN_any, 0,
1970 	PIXMAN_any, FAST_PATH_STD_DEST_FLAGS,
1971 	fast_composite_tiled_repeat
1972     },
1973 
1974     {   PIXMAN_OP_NONE	},
1975 };
1976 
1977 #ifdef WORDS_BIGENDIAN
1978 #define A1_FILL_MASK(n, offs) (((1U << (n)) - 1) << (32 - (offs) - (n)))
1979 #else
1980 #define A1_FILL_MASK(n, offs) (((1U << (n)) - 1) << (offs))
1981 #endif
1982 
1983 static force_inline void
pixman_fill1_line(uint32_t * dst,int offs,int width,int v)1984 pixman_fill1_line (uint32_t *dst, int offs, int width, int v)
1985 {
1986     if (offs)
1987     {
1988 	int leading_pixels = 32 - offs;
1989 	if (leading_pixels >= width)
1990 	{
1991 	    if (v)
1992 		*dst |= A1_FILL_MASK (width, offs);
1993 	    else
1994 		*dst &= ~A1_FILL_MASK (width, offs);
1995 	    return;
1996 	}
1997 	else
1998 	{
1999 	    if (v)
2000 		*dst++ |= A1_FILL_MASK (leading_pixels, offs);
2001 	    else
2002 		*dst++ &= ~A1_FILL_MASK (leading_pixels, offs);
2003 	    width -= leading_pixels;
2004 	}
2005     }
2006     while (width >= 32)
2007     {
2008 	if (v)
2009 	    *dst++ = 0xFFFFFFFF;
2010 	else
2011 	    *dst++ = 0;
2012 	width -= 32;
2013     }
2014     if (width > 0)
2015     {
2016 	if (v)
2017 	    *dst |= A1_FILL_MASK (width, 0);
2018 	else
2019 	    *dst &= ~A1_FILL_MASK (width, 0);
2020     }
2021 }
2022 
2023 static void
pixman_fill1(uint32_t * bits,int stride,int x,int y,int width,int height,uint32_t filler)2024 pixman_fill1 (uint32_t *bits,
2025               int       stride,
2026               int       x,
2027               int       y,
2028               int       width,
2029               int       height,
2030               uint32_t  filler)
2031 {
2032     uint32_t *dst = bits + y * stride + (x >> 5);
2033     int offs = x & 31;
2034 
2035     if (filler & 1)
2036     {
2037 	while (height--)
2038 	{
2039 	    pixman_fill1_line (dst, offs, width, 1);
2040 	    dst += stride;
2041 	}
2042     }
2043     else
2044     {
2045 	while (height--)
2046 	{
2047 	    pixman_fill1_line (dst, offs, width, 0);
2048 	    dst += stride;
2049 	}
2050     }
2051 }
2052 
2053 static void
pixman_fill8(uint32_t * bits,int stride,int x,int y,int width,int height,uint32_t filler)2054 pixman_fill8 (uint32_t *bits,
2055               int       stride,
2056               int       x,
2057               int       y,
2058               int       width,
2059               int       height,
2060               uint32_t  filler)
2061 {
2062     int byte_stride = stride * (int) sizeof (uint32_t);
2063     uint8_t *dst = (uint8_t *) bits;
2064     uint8_t v = filler & 0xff;
2065     int i;
2066 
2067     dst = dst + y * byte_stride + x;
2068 
2069     while (height--)
2070     {
2071 	for (i = 0; i < width; ++i)
2072 	    dst[i] = v;
2073 
2074 	dst += byte_stride;
2075     }
2076 }
2077 
2078 static void
pixman_fill16(uint32_t * bits,int stride,int x,int y,int width,int height,uint32_t filler)2079 pixman_fill16 (uint32_t *bits,
2080                int       stride,
2081                int       x,
2082                int       y,
2083                int       width,
2084                int       height,
2085                uint32_t  filler)
2086 {
2087     int short_stride =
2088 	(stride * (int)sizeof (uint32_t)) / (int)sizeof (uint16_t);
2089     uint16_t *dst = (uint16_t *)bits;
2090     uint16_t v = filler & 0xffff;
2091     int i;
2092 
2093     dst = dst + y * short_stride + x;
2094 
2095     while (height--)
2096     {
2097 	for (i = 0; i < width; ++i)
2098 	    dst[i] = v;
2099 
2100 	dst += short_stride;
2101     }
2102 }
2103 
2104 static void
pixman_fill32(uint32_t * bits,int stride,int x,int y,int width,int height,uint32_t filler)2105 pixman_fill32 (uint32_t *bits,
2106                int       stride,
2107                int       x,
2108                int       y,
2109                int       width,
2110                int       height,
2111                uint32_t  filler)
2112 {
2113     int i;
2114 
2115     bits = bits + y * stride + x;
2116 
2117     while (height--)
2118     {
2119 	for (i = 0; i < width; ++i)
2120 	    bits[i] = filler;
2121 
2122 	bits += stride;
2123     }
2124 }
2125 
2126 static pixman_bool_t
fast_path_fill(pixman_implementation_t * imp,uint32_t * bits,int stride,int bpp,int x,int y,int width,int height,uint32_t filler)2127 fast_path_fill (pixman_implementation_t *imp,
2128                 uint32_t *               bits,
2129                 int                      stride,
2130                 int                      bpp,
2131                 int                      x,
2132                 int                      y,
2133                 int                      width,
2134                 int                      height,
2135                 uint32_t		 filler)
2136 {
2137     switch (bpp)
2138     {
2139     case 1:
2140 	pixman_fill1 (bits, stride, x, y, width, height, filler);
2141 	break;
2142 
2143     case 8:
2144 	pixman_fill8 (bits, stride, x, y, width, height, filler);
2145 	break;
2146 
2147     case 16:
2148 	pixman_fill16 (bits, stride, x, y, width, height, filler);
2149 	break;
2150 
2151     case 32:
2152 	pixman_fill32 (bits, stride, x, y, width, height, filler);
2153 	break;
2154 
2155     default:
2156 	return FALSE;
2157     }
2158 
2159     return TRUE;
2160 }
2161 
2162 /*****************************************************************************/
2163 
2164 static uint32_t *
fast_fetch_r5g6b5(pixman_iter_t * iter,const uint32_t * mask)2165 fast_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask)
2166 {
2167     int32_t w = iter->width;
2168     uint32_t *dst = iter->buffer;
2169     const uint16_t *src = (const uint16_t *)iter->bits;
2170 
2171     iter->bits += iter->stride;
2172 
2173     /* Align the source buffer at 4 bytes boundary */
2174     if (w > 0 && ((uintptr_t)src & 3))
2175     {
2176 	*dst++ = convert_0565_to_8888 (*src++);
2177 	w--;
2178     }
2179     /* Process two pixels per iteration */
2180     while ((w -= 2) >= 0)
2181     {
2182 	uint32_t sr, sb, sg, t0, t1;
2183 	uint32_t s = *(const uint32_t *)src;
2184 	src += 2;
2185 	sr = (s >> 8) & 0x00F800F8;
2186 	sb = (s << 3) & 0x00F800F8;
2187 	sg = (s >> 3) & 0x00FC00FC;
2188 	sr |= sr >> 5;
2189 	sb |= sb >> 5;
2190 	sg |= sg >> 6;
2191 	t0 = ((sr << 16) & 0x00FF0000) | ((sg << 8) & 0x0000FF00) |
2192 	     (sb & 0xFF) | 0xFF000000;
2193 	t1 = (sr & 0x00FF0000) | ((sg >> 8) & 0x0000FF00) |
2194 	     (sb >> 16) | 0xFF000000;
2195 #ifdef WORDS_BIGENDIAN
2196 	*dst++ = t1;
2197 	*dst++ = t0;
2198 #else
2199 	*dst++ = t0;
2200 	*dst++ = t1;
2201 #endif
2202     }
2203     if (w & 1)
2204     {
2205 	*dst = convert_0565_to_8888 (*src);
2206     }
2207 
2208     return iter->buffer;
2209 }
2210 
2211 static uint32_t *
fast_dest_fetch_noop(pixman_iter_t * iter,const uint32_t * mask)2212 fast_dest_fetch_noop (pixman_iter_t *iter, const uint32_t *mask)
2213 {
2214     iter->bits += iter->stride;
2215     return iter->buffer;
2216 }
2217 
2218 /* Helper function for a workaround, which tries to ensure that 0x1F001F
2219  * constant is always allocated in a register on RISC architectures.
2220  */
2221 static force_inline uint32_t
convert_8888_to_0565_workaround(uint32_t s,uint32_t x1F001F)2222 convert_8888_to_0565_workaround (uint32_t s, uint32_t x1F001F)
2223 {
2224     uint32_t a, b;
2225     a = (s >> 3) & x1F001F;
2226     b = s & 0xFC00;
2227     a |= a >> 5;
2228     a |= b >> 5;
2229     return a;
2230 }
2231 
2232 static void
fast_write_back_r5g6b5(pixman_iter_t * iter)2233 fast_write_back_r5g6b5 (pixman_iter_t *iter)
2234 {
2235     int32_t w = iter->width;
2236     uint16_t *dst = (uint16_t *)(iter->bits - iter->stride);
2237     const uint32_t *src = iter->buffer;
2238     /* Workaround to ensure that x1F001F variable is allocated in a register */
2239     static volatile uint32_t volatile_x1F001F = 0x1F001F;
2240     uint32_t x1F001F = volatile_x1F001F;
2241 
2242     while ((w -= 4) >= 0)
2243     {
2244 	uint32_t s1 = *src++;
2245 	uint32_t s2 = *src++;
2246 	uint32_t s3 = *src++;
2247 	uint32_t s4 = *src++;
2248 	*dst++ = convert_8888_to_0565_workaround (s1, x1F001F);
2249 	*dst++ = convert_8888_to_0565_workaround (s2, x1F001F);
2250 	*dst++ = convert_8888_to_0565_workaround (s3, x1F001F);
2251 	*dst++ = convert_8888_to_0565_workaround (s4, x1F001F);
2252     }
2253     if (w & 2)
2254     {
2255 	*dst++ = convert_8888_to_0565_workaround (*src++, x1F001F);
2256 	*dst++ = convert_8888_to_0565_workaround (*src++, x1F001F);
2257     }
2258     if (w & 1)
2259     {
2260 	*dst = convert_8888_to_0565_workaround (*src, x1F001F);
2261     }
2262 }
2263 
2264 typedef struct
2265 {
2266     int		y;
2267     uint64_t *	buffer;
2268 } line_t;
2269 
2270 typedef struct
2271 {
2272     line_t		lines[2];
2273     pixman_fixed_t	y;
2274     pixman_fixed_t	x;
2275     uint64_t		data[1];
2276 } bilinear_info_t;
2277 
2278 static void
fetch_horizontal(bits_image_t * image,line_t * line,int y,pixman_fixed_t x,pixman_fixed_t ux,int n)2279 fetch_horizontal (bits_image_t *image, line_t *line,
2280 		  int y, pixman_fixed_t x, pixman_fixed_t ux, int n)
2281 {
2282     uint32_t *bits = image->bits + y * image->rowstride;
2283     int i;
2284 
2285     for (i = 0; i < n; ++i)
2286     {
2287 	int x0 = pixman_fixed_to_int (x);
2288 	int x1 = x0 + 1;
2289 	int32_t dist_x;
2290 
2291 	uint32_t left = *(bits + x0);
2292 	uint32_t right = *(bits + x1);
2293 
2294 	dist_x = pixman_fixed_to_bilinear_weight (x);
2295 	dist_x <<= (8 - BILINEAR_INTERPOLATION_BITS);
2296 
2297 #if SIZEOF_LONG <= 4
2298 	{
2299 	    uint32_t lag, rag, ag;
2300 	    uint32_t lrb, rrb, rb;
2301 
2302 	    lag = (left & 0xff00ff00) >> 8;
2303 	    rag = (right & 0xff00ff00) >> 8;
2304 	    ag = (lag << 8) + dist_x * (rag - lag);
2305 
2306 	    lrb = (left & 0x00ff00ff);
2307 	    rrb = (right & 0x00ff00ff);
2308 	    rb = (lrb << 8) + dist_x * (rrb - lrb);
2309 
2310 	    *((uint32_t *)(line->buffer + i)) = ag;
2311 	    *((uint32_t *)(line->buffer + i) + 1) = rb;
2312 	}
2313 #else
2314 	{
2315 	    uint64_t lagrb, ragrb;
2316 	    uint32_t lag, rag;
2317 	    uint32_t lrb, rrb;
2318 
2319 	    lag = (left & 0xff00ff00);
2320 	    lrb = (left & 0x00ff00ff);
2321 	    rag = (right & 0xff00ff00);
2322 	    rrb = (right & 0x00ff00ff);
2323 	    lagrb = (((uint64_t)lag) << 24) | lrb;
2324 	    ragrb = (((uint64_t)rag) << 24) | rrb;
2325 
2326 	    line->buffer[i] = (lagrb << 8) + dist_x * (ragrb - lagrb);
2327 	}
2328 #endif
2329 
2330 	x += ux;
2331     }
2332 
2333     line->y = y;
2334 }
2335 
2336 static uint32_t *
fast_fetch_bilinear_cover(pixman_iter_t * iter,const uint32_t * mask)2337 fast_fetch_bilinear_cover (pixman_iter_t *iter, const uint32_t *mask)
2338 {
2339     pixman_fixed_t fx, ux;
2340     bilinear_info_t *info = iter->data;
2341     line_t *line0, *line1;
2342     int y0, y1;
2343     int32_t dist_y;
2344     int i;
2345 
2346     COMPILE_TIME_ASSERT (BILINEAR_INTERPOLATION_BITS < 8);
2347 
2348     fx = info->x;
2349     ux = iter->image->common.transform->matrix[0][0];
2350 
2351     y0 = pixman_fixed_to_int (info->y);
2352     y1 = y0 + 1;
2353     dist_y = pixman_fixed_to_bilinear_weight (info->y);
2354     dist_y <<= (8 - BILINEAR_INTERPOLATION_BITS);
2355 
2356     line0 = &info->lines[y0 & 0x01];
2357     line1 = &info->lines[y1 & 0x01];
2358 
2359     if (line0->y != y0)
2360     {
2361 	fetch_horizontal (
2362 	    &iter->image->bits, line0, y0, fx, ux, iter->width);
2363     }
2364 
2365     if (line1->y != y1)
2366     {
2367 	fetch_horizontal (
2368 	    &iter->image->bits, line1, y1, fx, ux, iter->width);
2369     }
2370 
2371     for (i = 0; i < iter->width; ++i)
2372     {
2373 #if SIZEOF_LONG <= 4
2374 	uint32_t ta, tr, tg, tb;
2375 	uint32_t ba, br, bg, bb;
2376 	uint32_t tag, trb;
2377 	uint32_t bag, brb;
2378 	uint32_t a, r, g, b;
2379 
2380 	tag = *((uint32_t *)(line0->buffer + i));
2381 	trb = *((uint32_t *)(line0->buffer + i) + 1);
2382 	bag = *((uint32_t *)(line1->buffer + i));
2383 	brb = *((uint32_t *)(line1->buffer + i) + 1);
2384 
2385 	ta = tag >> 16;
2386 	ba = bag >> 16;
2387 	a = (ta << 8) + dist_y * (ba - ta);
2388 
2389 	tr = trb >> 16;
2390 	br = brb >> 16;
2391 	r = (tr << 8) + dist_y * (br - tr);
2392 
2393 	tg = tag & 0xffff;
2394 	bg = bag & 0xffff;
2395 	g = (tg << 8) + dist_y * (bg - tg);
2396 
2397 	tb = trb & 0xffff;
2398 	bb = brb & 0xffff;
2399 	b = (tb << 8) + dist_y * (bb - tb);
2400 
2401 	a = (a <<  8) & 0xff000000;
2402 	r = (r <<  0) & 0x00ff0000;
2403 	g = (g >>  8) & 0x0000ff00;
2404 	b = (b >> 16) & 0x000000ff;
2405 #else
2406 	uint64_t top = line0->buffer[i];
2407 	uint64_t bot = line1->buffer[i];
2408 	uint64_t tar = (top & 0xffff0000ffff0000ULL) >> 16;
2409 	uint64_t bar = (bot & 0xffff0000ffff0000ULL) >> 16;
2410 	uint64_t tgb = (top & 0x0000ffff0000ffffULL);
2411 	uint64_t bgb = (bot & 0x0000ffff0000ffffULL);
2412 	uint64_t ar, gb;
2413 	uint32_t a, r, g, b;
2414 
2415 	ar = (tar << 8) + dist_y * (bar - tar);
2416 	gb = (tgb << 8) + dist_y * (bgb - tgb);
2417 
2418 	a = ((ar >> 24) & 0xff000000);
2419 	r = ((ar >>  0) & 0x00ff0000);
2420 	g = ((gb >> 40) & 0x0000ff00);
2421 	b = ((gb >> 16) & 0x000000ff);
2422 #endif
2423 
2424 	iter->buffer[i] = a | r | g | b;
2425     }
2426 
2427     info->y += iter->image->common.transform->matrix[1][1];
2428 
2429     return iter->buffer;
2430 }
2431 
2432 static void
bilinear_cover_iter_fini(pixman_iter_t * iter)2433 bilinear_cover_iter_fini (pixman_iter_t *iter)
2434 {
2435     free (iter->data);
2436 }
2437 
2438 static void
fast_bilinear_cover_iter_init(pixman_iter_t * iter,const pixman_iter_info_t * iter_info)2439 fast_bilinear_cover_iter_init (pixman_iter_t *iter, const pixman_iter_info_t *iter_info)
2440 {
2441     int width = iter->width;
2442     bilinear_info_t *info;
2443     pixman_vector_t v;
2444 
2445     /* Reference point is the center of the pixel */
2446     v.vector[0] = pixman_int_to_fixed (iter->x) + pixman_fixed_1 / 2;
2447     v.vector[1] = pixman_int_to_fixed (iter->y) + pixman_fixed_1 / 2;
2448     v.vector[2] = pixman_fixed_1;
2449 
2450     if (!pixman_transform_point_3d (iter->image->common.transform, &v))
2451 	goto fail;
2452 
2453     info = malloc (sizeof (*info) + (2 * width - 1) * sizeof (uint64_t));
2454     if (!info)
2455 	goto fail;
2456 
2457     info->x = v.vector[0] - pixman_fixed_1 / 2;
2458     info->y = v.vector[1] - pixman_fixed_1 / 2;
2459 
2460     /* It is safe to set the y coordinates to -1 initially
2461      * because COVER_CLIP_BILINEAR ensures that we will only
2462      * be asked to fetch lines in the [0, height) interval
2463      */
2464     info->lines[0].y = -1;
2465     info->lines[0].buffer = &(info->data[0]);
2466     info->lines[1].y = -1;
2467     info->lines[1].buffer = &(info->data[width]);
2468 
2469     iter->get_scanline = fast_fetch_bilinear_cover;
2470     iter->fini = bilinear_cover_iter_fini;
2471 
2472     iter->data = info;
2473     return;
2474 
2475 fail:
2476     /* Something went wrong, either a bad matrix or OOM; in such cases,
2477      * we don't guarantee any particular rendering.
2478      */
2479     _pixman_log_error (
2480 	FUNC, "Allocation failure or bad matrix, skipping rendering\n");
2481 
2482     iter->get_scanline = _pixman_iter_get_scanline_noop;
2483     iter->fini = NULL;
2484 }
2485 
2486 static uint32_t *
bits_image_fetch_bilinear_no_repeat_8888(pixman_iter_t * iter,const uint32_t * mask)2487 bits_image_fetch_bilinear_no_repeat_8888 (pixman_iter_t *iter,
2488 					  const uint32_t *mask)
2489 {
2490 
2491     pixman_image_t * ima = iter->image;
2492     int              offset = iter->x;
2493     int              line = iter->y++;
2494     int              width = iter->width;
2495     uint32_t *       buffer = iter->buffer;
2496 
2497     bits_image_t *bits = &ima->bits;
2498     pixman_fixed_t x_top, x_bottom, x;
2499     pixman_fixed_t ux_top, ux_bottom, ux;
2500     pixman_vector_t v;
2501     uint32_t top_mask, bottom_mask;
2502     uint32_t *top_row;
2503     uint32_t *bottom_row;
2504     uint32_t *end;
2505     uint32_t zero[2] = { 0, 0 };
2506     uint32_t one = 1;
2507     int y, y1, y2;
2508     int disty;
2509     int mask_inc;
2510     int w;
2511 
2512     /* reference point is the center of the pixel */
2513     v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2;
2514     v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2;
2515     v.vector[2] = pixman_fixed_1;
2516 
2517     if (!pixman_transform_point_3d (bits->common.transform, &v))
2518 	return iter->buffer;
2519 
2520     ux = ux_top = ux_bottom = bits->common.transform->matrix[0][0];
2521     x = x_top = x_bottom = v.vector[0] - pixman_fixed_1/2;
2522 
2523     y = v.vector[1] - pixman_fixed_1/2;
2524     disty = pixman_fixed_to_bilinear_weight (y);
2525 
2526     /* Load the pointers to the first and second lines from the source
2527      * image that bilinear code must read.
2528      *
2529      * The main trick in this code is about the check if any line are
2530      * outside of the image;
2531      *
2532      * When I realize that a line (any one) is outside, I change
2533      * the pointer to a dummy area with zeros. Once I change this, I
2534      * must be sure the pointer will not change, so I set the
2535      * variables to each pointer increments inside the loop.
2536      */
2537     y1 = pixman_fixed_to_int (y);
2538     y2 = y1 + 1;
2539 
2540     if (y1 < 0 || y1 >= bits->height)
2541     {
2542 	top_row = zero;
2543 	x_top = 0;
2544 	ux_top = 0;
2545     }
2546     else
2547     {
2548 	top_row = bits->bits + y1 * bits->rowstride;
2549 	x_top = x;
2550 	ux_top = ux;
2551     }
2552 
2553     if (y2 < 0 || y2 >= bits->height)
2554     {
2555 	bottom_row = zero;
2556 	x_bottom = 0;
2557 	ux_bottom = 0;
2558     }
2559     else
2560     {
2561 	bottom_row = bits->bits + y2 * bits->rowstride;
2562 	x_bottom = x;
2563 	ux_bottom = ux;
2564     }
2565 
2566     /* Instead of checking whether the operation uses the mast in
2567      * each loop iteration, verify this only once and prepare the
2568      * variables to make the code smaller inside the loop.
2569      */
2570     if (!mask)
2571     {
2572         mask_inc = 0;
2573         mask = &one;
2574     }
2575     else
2576     {
2577         /* If have a mask, prepare the variables to check it */
2578         mask_inc = 1;
2579     }
2580 
2581     /* If both are zero, then the whole thing is zero */
2582     if (top_row == zero && bottom_row == zero)
2583     {
2584 	memset (buffer, 0, width * sizeof (uint32_t));
2585 	return iter->buffer;
2586     }
2587     else if (bits->format == PIXMAN_x8r8g8b8)
2588     {
2589 	if (top_row == zero)
2590 	{
2591 	    top_mask = 0;
2592 	    bottom_mask = 0xff000000;
2593 	}
2594 	else if (bottom_row == zero)
2595 	{
2596 	    top_mask = 0xff000000;
2597 	    bottom_mask = 0;
2598 	}
2599 	else
2600 	{
2601 	    top_mask = 0xff000000;
2602 	    bottom_mask = 0xff000000;
2603 	}
2604     }
2605     else
2606     {
2607 	top_mask = 0;
2608 	bottom_mask = 0;
2609     }
2610 
2611     end = buffer + width;
2612 
2613     /* Zero fill to the left of the image */
2614     while (buffer < end && x < pixman_fixed_minus_1)
2615     {
2616 	*buffer++ = 0;
2617 	x += ux;
2618 	x_top += ux_top;
2619 	x_bottom += ux_bottom;
2620 	mask += mask_inc;
2621     }
2622 
2623     /* Left edge
2624      */
2625     while (buffer < end && x < 0)
2626     {
2627 	uint32_t tr, br;
2628 	int32_t distx;
2629 
2630 	tr = top_row[pixman_fixed_to_int (x_top) + 1] | top_mask;
2631 	br = bottom_row[pixman_fixed_to_int (x_bottom) + 1] | bottom_mask;
2632 
2633 	distx = pixman_fixed_to_bilinear_weight (x);
2634 
2635 	*buffer++ = bilinear_interpolation (0, tr, 0, br, distx, disty);
2636 
2637 	x += ux;
2638 	x_top += ux_top;
2639 	x_bottom += ux_bottom;
2640 	mask += mask_inc;
2641     }
2642 
2643     /* Main part */
2644     w = pixman_int_to_fixed (bits->width - 1);
2645 
2646     while (buffer < end  &&  x < w)
2647     {
2648 	if (*mask)
2649 	{
2650 	    uint32_t tl, tr, bl, br;
2651 	    int32_t distx;
2652 
2653 	    tl = top_row [pixman_fixed_to_int (x_top)] | top_mask;
2654 	    tr = top_row [pixman_fixed_to_int (x_top) + 1] | top_mask;
2655 	    bl = bottom_row [pixman_fixed_to_int (x_bottom)] | bottom_mask;
2656 	    br = bottom_row [pixman_fixed_to_int (x_bottom) + 1] | bottom_mask;
2657 
2658 	    distx = pixman_fixed_to_bilinear_weight (x);
2659 
2660 	    *buffer = bilinear_interpolation (tl, tr, bl, br, distx, disty);
2661 	}
2662 
2663 	buffer++;
2664 	x += ux;
2665 	x_top += ux_top;
2666 	x_bottom += ux_bottom;
2667 	mask += mask_inc;
2668     }
2669 
2670     /* Right Edge */
2671     w = pixman_int_to_fixed (bits->width);
2672     while (buffer < end  &&  x < w)
2673     {
2674 	if (*mask)
2675 	{
2676 	    uint32_t tl, bl;
2677 	    int32_t distx;
2678 
2679 	    tl = top_row [pixman_fixed_to_int (x_top)] | top_mask;
2680 	    bl = bottom_row [pixman_fixed_to_int (x_bottom)] | bottom_mask;
2681 
2682 	    distx = pixman_fixed_to_bilinear_weight (x);
2683 
2684 	    *buffer = bilinear_interpolation (tl, 0, bl, 0, distx, disty);
2685 	}
2686 
2687 	buffer++;
2688 	x += ux;
2689 	x_top += ux_top;
2690 	x_bottom += ux_bottom;
2691 	mask += mask_inc;
2692     }
2693 
2694     /* Zero fill to the left of the image */
2695     while (buffer < end)
2696 	*buffer++ = 0;
2697 
2698     return iter->buffer;
2699 }
2700 
2701 typedef uint32_t (* convert_pixel_t) (const uint8_t *row, int x);
2702 
2703 static force_inline void
bits_image_fetch_separable_convolution_affine(pixman_image_t * image,int offset,int line,int width,uint32_t * buffer,const uint32_t * mask,convert_pixel_t convert_pixel,pixman_format_code_t format,pixman_repeat_t repeat_mode)2704 bits_image_fetch_separable_convolution_affine (pixman_image_t * image,
2705 					       int              offset,
2706 					       int              line,
2707 					       int              width,
2708 					       uint32_t *       buffer,
2709 					       const uint32_t * mask,
2710 
2711 					       convert_pixel_t	convert_pixel,
2712 					       pixman_format_code_t	format,
2713 					       pixman_repeat_t	repeat_mode)
2714 {
2715     bits_image_t *bits = &image->bits;
2716     pixman_fixed_t *params = image->common.filter_params;
2717     int cwidth = pixman_fixed_to_int (params[0]);
2718     int cheight = pixman_fixed_to_int (params[1]);
2719     int x_off = ((cwidth << 16) - pixman_fixed_1) >> 1;
2720     int y_off = ((cheight << 16) - pixman_fixed_1) >> 1;
2721     int x_phase_bits = pixman_fixed_to_int (params[2]);
2722     int y_phase_bits = pixman_fixed_to_int (params[3]);
2723     int x_phase_shift = 16 - x_phase_bits;
2724     int y_phase_shift = 16 - y_phase_bits;
2725     pixman_fixed_t vx, vy;
2726     pixman_fixed_t ux, uy;
2727     pixman_vector_t v;
2728     int k;
2729 
2730     /* reference point is the center of the pixel */
2731     v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2;
2732     v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2;
2733     v.vector[2] = pixman_fixed_1;
2734 
2735     if (!pixman_transform_point_3d (image->common.transform, &v))
2736 	return;
2737 
2738     ux = image->common.transform->matrix[0][0];
2739     uy = image->common.transform->matrix[1][0];
2740 
2741     vx = v.vector[0];
2742     vy = v.vector[1];
2743 
2744     for (k = 0; k < width; ++k)
2745     {
2746 	pixman_fixed_t *y_params;
2747 	int satot, srtot, sgtot, sbtot;
2748 	pixman_fixed_t x, y;
2749 	int32_t x1, x2, y1, y2;
2750 	int32_t px, py;
2751 	int i, j;
2752 
2753 	if (mask && !mask[k])
2754 	    goto next;
2755 
2756 	/* Round x and y to the middle of the closest phase before continuing. This
2757 	 * ensures that the convolution matrix is aligned right, since it was
2758 	 * positioned relative to a particular phase (and not relative to whatever
2759 	 * exact fraction we happen to get here).
2760 	 */
2761 	x = ((vx >> x_phase_shift) << x_phase_shift) + ((1 << x_phase_shift) >> 1);
2762 	y = ((vy >> y_phase_shift) << y_phase_shift) + ((1 << y_phase_shift) >> 1);
2763 
2764 	px = (x & 0xffff) >> x_phase_shift;
2765 	py = (y & 0xffff) >> y_phase_shift;
2766 
2767 	x1 = pixman_fixed_to_int (x - pixman_fixed_e - x_off);
2768 	y1 = pixman_fixed_to_int (y - pixman_fixed_e - y_off);
2769 	x2 = x1 + cwidth;
2770 	y2 = y1 + cheight;
2771 
2772 	satot = srtot = sgtot = sbtot = 0;
2773 
2774 	y_params = params + 4 + (1 << x_phase_bits) * cwidth + py * cheight;
2775 
2776 	for (i = y1; i < y2; ++i)
2777 	{
2778 	    pixman_fixed_t fy = *y_params++;
2779 
2780 	    if (fy)
2781 	    {
2782 		pixman_fixed_t *x_params = params + 4 + px * cwidth;
2783 
2784 		for (j = x1; j < x2; ++j)
2785 		{
2786 		    pixman_fixed_t fx = *x_params++;
2787 		    int rx = j;
2788 		    int ry = i;
2789 
2790 		    if (fx)
2791 		    {
2792 			pixman_fixed_t f;
2793 			uint32_t pixel, mask;
2794 			uint8_t *row;
2795 
2796 			mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000;
2797 
2798 			if (repeat_mode != PIXMAN_REPEAT_NONE)
2799 			{
2800 			    repeat (repeat_mode, &rx, bits->width);
2801 			    repeat (repeat_mode, &ry, bits->height);
2802 
2803 			    row = (uint8_t *)(bits->bits + bits->rowstride * ry);
2804 			    pixel = convert_pixel (row, rx) | mask;
2805 			}
2806 			else
2807 			{
2808 			    if (rx < 0 || ry < 0 || rx >= bits->width || ry >= bits->height)
2809 			    {
2810 				pixel = 0;
2811 			    }
2812 			    else
2813 			    {
2814 				row = (uint8_t *)(bits->bits + bits->rowstride * ry);
2815 				pixel = convert_pixel (row, rx) | mask;
2816 			    }
2817 			}
2818 
2819 			f = ((pixman_fixed_32_32_t)fx * fy + 0x8000) >> 16;
2820 			srtot += (int)RED_8 (pixel) * f;
2821 			sgtot += (int)GREEN_8 (pixel) * f;
2822 			sbtot += (int)BLUE_8 (pixel) * f;
2823 			satot += (int)ALPHA_8 (pixel) * f;
2824 		    }
2825 		}
2826 	    }
2827 	}
2828 
2829 	satot = (satot + 0x8000) >> 16;
2830 	srtot = (srtot + 0x8000) >> 16;
2831 	sgtot = (sgtot + 0x8000) >> 16;
2832 	sbtot = (sbtot + 0x8000) >> 16;
2833 
2834 	satot = CLIP (satot, 0, 0xff);
2835 	srtot = CLIP (srtot, 0, 0xff);
2836 	sgtot = CLIP (sgtot, 0, 0xff);
2837 	sbtot = CLIP (sbtot, 0, 0xff);
2838 
2839 	buffer[k] = (satot << 24) | (srtot << 16) | (sgtot << 8) | (sbtot << 0);
2840 
2841     next:
2842 	vx += ux;
2843 	vy += uy;
2844     }
2845 }
2846 
2847 static const uint8_t zero[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
2848 
2849 static force_inline void
bits_image_fetch_bilinear_affine(pixman_image_t * image,int offset,int line,int width,uint32_t * buffer,const uint32_t * mask,convert_pixel_t convert_pixel,pixman_format_code_t format,pixman_repeat_t repeat_mode)2850 bits_image_fetch_bilinear_affine (pixman_image_t * image,
2851 				  int              offset,
2852 				  int              line,
2853 				  int              width,
2854 				  uint32_t *       buffer,
2855 				  const uint32_t * mask,
2856 
2857 				  convert_pixel_t	convert_pixel,
2858 				  pixman_format_code_t	format,
2859 				  pixman_repeat_t	repeat_mode)
2860 {
2861     pixman_fixed_t x, y;
2862     pixman_fixed_t ux, uy;
2863     pixman_vector_t v;
2864     bits_image_t *bits = &image->bits;
2865     int i;
2866 
2867     /* reference point is the center of the pixel */
2868     v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2;
2869     v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2;
2870     v.vector[2] = pixman_fixed_1;
2871 
2872     if (!pixman_transform_point_3d (image->common.transform, &v))
2873 	return;
2874 
2875     ux = image->common.transform->matrix[0][0];
2876     uy = image->common.transform->matrix[1][0];
2877 
2878     x = v.vector[0];
2879     y = v.vector[1];
2880 
2881     for (i = 0; i < width; ++i)
2882     {
2883 	int x1, y1, x2, y2;
2884 	uint32_t tl, tr, bl, br;
2885 	int32_t distx, disty;
2886 	int width = image->bits.width;
2887 	int height = image->bits.height;
2888 	const uint8_t *row1;
2889 	const uint8_t *row2;
2890 
2891 	if (mask && !mask[i])
2892 	    goto next;
2893 
2894 	x1 = x - pixman_fixed_1 / 2;
2895 	y1 = y - pixman_fixed_1 / 2;
2896 
2897 	distx = pixman_fixed_to_bilinear_weight (x1);
2898 	disty = pixman_fixed_to_bilinear_weight (y1);
2899 
2900 	y1 = pixman_fixed_to_int (y1);
2901 	y2 = y1 + 1;
2902 	x1 = pixman_fixed_to_int (x1);
2903 	x2 = x1 + 1;
2904 
2905 	if (repeat_mode != PIXMAN_REPEAT_NONE)
2906 	{
2907 	    uint32_t mask;
2908 
2909 	    mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000;
2910 
2911 	    repeat (repeat_mode, &x1, width);
2912 	    repeat (repeat_mode, &y1, height);
2913 	    repeat (repeat_mode, &x2, width);
2914 	    repeat (repeat_mode, &y2, height);
2915 
2916 	    row1 = (uint8_t *)(bits->bits + bits->rowstride * y1);
2917 	    row2 = (uint8_t *)(bits->bits + bits->rowstride * y2);
2918 
2919 	    tl = convert_pixel (row1, x1) | mask;
2920 	    tr = convert_pixel (row1, x2) | mask;
2921 	    bl = convert_pixel (row2, x1) | mask;
2922 	    br = convert_pixel (row2, x2) | mask;
2923 	}
2924 	else
2925 	{
2926 	    uint32_t mask1, mask2;
2927 	    int bpp;
2928 
2929 	    /* Note: PIXMAN_FORMAT_BPP() returns an unsigned value,
2930 	     * which means if you use it in expressions, those
2931 	     * expressions become unsigned themselves. Since
2932 	     * the variables below can be negative in some cases,
2933 	     * that will lead to crashes on 64 bit architectures.
2934 	     *
2935 	     * So this line makes sure bpp is signed
2936 	     */
2937 	    bpp = PIXMAN_FORMAT_BPP (format);
2938 
2939 	    if (x1 >= width || x2 < 0 || y1 >= height || y2 < 0)
2940 	    {
2941 		buffer[i] = 0;
2942 		goto next;
2943 	    }
2944 
2945 	    if (y2 == 0)
2946 	    {
2947 		row1 = zero;
2948 		mask1 = 0;
2949 	    }
2950 	    else
2951 	    {
2952 		row1 = (uint8_t *)(bits->bits + bits->rowstride * y1);
2953 		row1 += bpp / 8 * x1;
2954 
2955 		mask1 = PIXMAN_FORMAT_A (format)? 0 : 0xff000000;
2956 	    }
2957 
2958 	    if (y1 == height - 1)
2959 	    {
2960 		row2 = zero;
2961 		mask2 = 0;
2962 	    }
2963 	    else
2964 	    {
2965 		row2 = (uint8_t *)(bits->bits + bits->rowstride * y2);
2966 		row2 += bpp / 8 * x1;
2967 
2968 		mask2 = PIXMAN_FORMAT_A (format)? 0 : 0xff000000;
2969 	    }
2970 
2971 	    if (x2 == 0)
2972 	    {
2973 		tl = 0;
2974 		bl = 0;
2975 	    }
2976 	    else
2977 	    {
2978 		tl = convert_pixel (row1, 0) | mask1;
2979 		bl = convert_pixel (row2, 0) | mask2;
2980 	    }
2981 
2982 	    if (x1 == width - 1)
2983 	    {
2984 		tr = 0;
2985 		br = 0;
2986 	    }
2987 	    else
2988 	    {
2989 		tr = convert_pixel (row1, 1) | mask1;
2990 		br = convert_pixel (row2, 1) | mask2;
2991 	    }
2992 	}
2993 
2994 	buffer[i] = bilinear_interpolation (
2995 	    tl, tr, bl, br, distx, disty);
2996 
2997     next:
2998 	x += ux;
2999 	y += uy;
3000     }
3001 }
3002 
3003 static force_inline void
bits_image_fetch_nearest_affine(pixman_image_t * image,int offset,int line,int width,uint32_t * buffer,const uint32_t * mask,convert_pixel_t convert_pixel,pixman_format_code_t format,pixman_repeat_t repeat_mode)3004 bits_image_fetch_nearest_affine (pixman_image_t * image,
3005 				 int              offset,
3006 				 int              line,
3007 				 int              width,
3008 				 uint32_t *       buffer,
3009 				 const uint32_t * mask,
3010 
3011 				 convert_pixel_t	convert_pixel,
3012 				 pixman_format_code_t	format,
3013 				 pixman_repeat_t	repeat_mode)
3014 {
3015     pixman_fixed_t x, y;
3016     pixman_fixed_t ux, uy;
3017     pixman_vector_t v;
3018     bits_image_t *bits = &image->bits;
3019     int i;
3020 
3021     /* reference point is the center of the pixel */
3022     v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2;
3023     v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2;
3024     v.vector[2] = pixman_fixed_1;
3025 
3026     if (!pixman_transform_point_3d (image->common.transform, &v))
3027 	return;
3028 
3029     ux = image->common.transform->matrix[0][0];
3030     uy = image->common.transform->matrix[1][0];
3031 
3032     x = v.vector[0];
3033     y = v.vector[1];
3034 
3035     for (i = 0; i < width; ++i)
3036     {
3037 	int width, height, x0, y0;
3038 	const uint8_t *row;
3039 
3040 	if (mask && !mask[i])
3041 	    goto next;
3042 
3043 	width = image->bits.width;
3044 	height = image->bits.height;
3045 	x0 = pixman_fixed_to_int (x - pixman_fixed_e);
3046 	y0 = pixman_fixed_to_int (y - pixman_fixed_e);
3047 
3048 	if (repeat_mode == PIXMAN_REPEAT_NONE &&
3049 	    (y0 < 0 || y0 >= height || x0 < 0 || x0 >= width))
3050 	{
3051 	    buffer[i] = 0;
3052 	}
3053 	else
3054 	{
3055 	    uint32_t mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000;
3056 
3057 	    if (repeat_mode != PIXMAN_REPEAT_NONE)
3058 	    {
3059 		repeat (repeat_mode, &x0, width);
3060 		repeat (repeat_mode, &y0, height);
3061 	    }
3062 
3063 	    row = (uint8_t *)(bits->bits + bits->rowstride * y0);
3064 
3065 	    buffer[i] = convert_pixel (row, x0) | mask;
3066 	}
3067 
3068     next:
3069 	x += ux;
3070 	y += uy;
3071     }
3072 }
3073 
3074 static force_inline uint32_t
convert_a8r8g8b8(const uint8_t * row,int x)3075 convert_a8r8g8b8 (const uint8_t *row, int x)
3076 {
3077     return *(((uint32_t *)row) + x);
3078 }
3079 
3080 static force_inline uint32_t
convert_x8r8g8b8(const uint8_t * row,int x)3081 convert_x8r8g8b8 (const uint8_t *row, int x)
3082 {
3083     return *(((uint32_t *)row) + x);
3084 }
3085 
3086 static force_inline uint32_t
convert_a8(const uint8_t * row,int x)3087 convert_a8 (const uint8_t *row, int x)
3088 {
3089     return (uint32_t) *(row + x) << 24;
3090 }
3091 
3092 static force_inline uint32_t
convert_r5g6b5(const uint8_t * row,int x)3093 convert_r5g6b5 (const uint8_t *row, int x)
3094 {
3095     return convert_0565_to_0888 (*((uint16_t *)row + x));
3096 }
3097 
3098 #define MAKE_SEPARABLE_CONVOLUTION_FETCHER(name, format, repeat_mode)  \
3099     static uint32_t *							\
3100     bits_image_fetch_separable_convolution_affine_ ## name (pixman_iter_t   *iter, \
3101 							    const uint32_t * mask) \
3102     {									\
3103 	bits_image_fetch_separable_convolution_affine (                 \
3104 	    iter->image,                                                \
3105 	    iter->x, iter->y++,                                         \
3106 	    iter->width,                                                \
3107 	    iter->buffer, mask,                                         \
3108 	    convert_ ## format,                                         \
3109 	    PIXMAN_ ## format,                                          \
3110 	    repeat_mode);                                               \
3111 									\
3112 	return iter->buffer;                                            \
3113     }
3114 
3115 #define MAKE_BILINEAR_FETCHER(name, format, repeat_mode)		\
3116     static uint32_t *							\
3117     bits_image_fetch_bilinear_affine_ ## name (pixman_iter_t   *iter,	\
3118 					       const uint32_t * mask)	\
3119     {									\
3120 	bits_image_fetch_bilinear_affine (iter->image,			\
3121 					  iter->x, iter->y++,		\
3122 					  iter->width,			\
3123 					  iter->buffer, mask,		\
3124 					  convert_ ## format,		\
3125 					  PIXMAN_ ## format,		\
3126 					  repeat_mode);			\
3127 	return iter->buffer;						\
3128     }
3129 
3130 #define MAKE_NEAREST_FETCHER(name, format, repeat_mode)			\
3131     static uint32_t *							\
3132     bits_image_fetch_nearest_affine_ ## name (pixman_iter_t   *iter,	\
3133 					      const uint32_t * mask)	\
3134     {									\
3135 	bits_image_fetch_nearest_affine (iter->image,			\
3136 					 iter->x, iter->y++,		\
3137 					 iter->width,			\
3138 					 iter->buffer, mask,		\
3139 					 convert_ ## format,		\
3140 					 PIXMAN_ ## format,		\
3141 					 repeat_mode);			\
3142 	return iter->buffer;						\
3143     }
3144 
3145 #define MAKE_FETCHERS(name, format, repeat_mode)			\
3146     MAKE_NEAREST_FETCHER (name, format, repeat_mode)			\
3147     MAKE_BILINEAR_FETCHER (name, format, repeat_mode)			\
3148     MAKE_SEPARABLE_CONVOLUTION_FETCHER (name, format, repeat_mode)
3149 
3150 MAKE_FETCHERS (pad_a8r8g8b8,     a8r8g8b8, PIXMAN_REPEAT_PAD)
3151 MAKE_FETCHERS (none_a8r8g8b8,    a8r8g8b8, PIXMAN_REPEAT_NONE)
3152 MAKE_FETCHERS (reflect_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_REFLECT)
3153 MAKE_FETCHERS (normal_a8r8g8b8,  a8r8g8b8, PIXMAN_REPEAT_NORMAL)
3154 MAKE_FETCHERS (pad_x8r8g8b8,     x8r8g8b8, PIXMAN_REPEAT_PAD)
3155 MAKE_FETCHERS (none_x8r8g8b8,    x8r8g8b8, PIXMAN_REPEAT_NONE)
3156 MAKE_FETCHERS (reflect_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_REFLECT)
3157 MAKE_FETCHERS (normal_x8r8g8b8,  x8r8g8b8, PIXMAN_REPEAT_NORMAL)
3158 MAKE_FETCHERS (pad_a8,           a8,       PIXMAN_REPEAT_PAD)
3159 MAKE_FETCHERS (none_a8,          a8,       PIXMAN_REPEAT_NONE)
3160 MAKE_FETCHERS (reflect_a8,	 a8,       PIXMAN_REPEAT_REFLECT)
3161 MAKE_FETCHERS (normal_a8,	 a8,       PIXMAN_REPEAT_NORMAL)
3162 MAKE_FETCHERS (pad_r5g6b5,       r5g6b5,   PIXMAN_REPEAT_PAD)
3163 MAKE_FETCHERS (none_r5g6b5,      r5g6b5,   PIXMAN_REPEAT_NONE)
3164 MAKE_FETCHERS (reflect_r5g6b5,   r5g6b5,   PIXMAN_REPEAT_REFLECT)
3165 MAKE_FETCHERS (normal_r5g6b5,    r5g6b5,   PIXMAN_REPEAT_NORMAL)
3166 
3167 #define IMAGE_FLAGS							\
3168     (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM |		\
3169      FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
3170 
3171 static const pixman_iter_info_t fast_iters[] =
3172 {
3173     { PIXMAN_r5g6b5, IMAGE_FLAGS, ITER_NARROW | ITER_SRC,
3174       _pixman_iter_init_bits_stride, fast_fetch_r5g6b5, NULL },
3175 
3176     { PIXMAN_r5g6b5, FAST_PATH_STD_DEST_FLAGS,
3177       ITER_NARROW | ITER_DEST,
3178       _pixman_iter_init_bits_stride,
3179       fast_fetch_r5g6b5, fast_write_back_r5g6b5 },
3180 
3181     { PIXMAN_r5g6b5, FAST_PATH_STD_DEST_FLAGS,
3182       ITER_NARROW | ITER_DEST | ITER_IGNORE_RGB | ITER_IGNORE_ALPHA,
3183       _pixman_iter_init_bits_stride,
3184       fast_dest_fetch_noop, fast_write_back_r5g6b5 },
3185 
3186     { PIXMAN_a8r8g8b8,
3187       (FAST_PATH_STANDARD_FLAGS			|
3188        FAST_PATH_SCALE_TRANSFORM		|
3189        FAST_PATH_BILINEAR_FILTER		|
3190        FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR),
3191       ITER_NARROW | ITER_SRC,
3192       fast_bilinear_cover_iter_init,
3193       NULL, NULL
3194     },
3195 
3196 #define FAST_BILINEAR_FLAGS						\
3197     (FAST_PATH_NO_ALPHA_MAP		|				\
3198      FAST_PATH_NO_ACCESSORS		|				\
3199      FAST_PATH_HAS_TRANSFORM		|				\
3200      FAST_PATH_AFFINE_TRANSFORM		|				\
3201      FAST_PATH_X_UNIT_POSITIVE		|				\
3202      FAST_PATH_Y_UNIT_ZERO		|				\
3203      FAST_PATH_NONE_REPEAT		|				\
3204      FAST_PATH_BILINEAR_FILTER)
3205 
3206     { PIXMAN_a8r8g8b8,
3207       FAST_BILINEAR_FLAGS,
3208       ITER_NARROW | ITER_SRC,
3209       NULL, bits_image_fetch_bilinear_no_repeat_8888, NULL
3210     },
3211 
3212     { PIXMAN_x8r8g8b8,
3213       FAST_BILINEAR_FLAGS,
3214       ITER_NARROW | ITER_SRC,
3215       NULL, bits_image_fetch_bilinear_no_repeat_8888, NULL
3216     },
3217 
3218 #define GENERAL_BILINEAR_FLAGS						\
3219     (FAST_PATH_NO_ALPHA_MAP		|				\
3220      FAST_PATH_NO_ACCESSORS		|				\
3221      FAST_PATH_HAS_TRANSFORM		|				\
3222      FAST_PATH_AFFINE_TRANSFORM		|				\
3223      FAST_PATH_BILINEAR_FILTER)
3224 
3225 #define GENERAL_NEAREST_FLAGS						\
3226     (FAST_PATH_NO_ALPHA_MAP		|				\
3227      FAST_PATH_NO_ACCESSORS		|				\
3228      FAST_PATH_HAS_TRANSFORM		|				\
3229      FAST_PATH_AFFINE_TRANSFORM		|				\
3230      FAST_PATH_NEAREST_FILTER)
3231 
3232 #define GENERAL_SEPARABLE_CONVOLUTION_FLAGS				\
3233     (FAST_PATH_NO_ALPHA_MAP            |				\
3234      FAST_PATH_NO_ACCESSORS            |				\
3235      FAST_PATH_HAS_TRANSFORM           |				\
3236      FAST_PATH_AFFINE_TRANSFORM        |				\
3237      FAST_PATH_SEPARABLE_CONVOLUTION_FILTER)
3238 
3239 #define SEPARABLE_CONVOLUTION_AFFINE_FAST_PATH(name, format, repeat)   \
3240     { PIXMAN_ ## format,						\
3241       GENERAL_SEPARABLE_CONVOLUTION_FLAGS | FAST_PATH_ ## repeat ## _REPEAT, \
3242       ITER_NARROW | ITER_SRC,						\
3243       NULL, bits_image_fetch_separable_convolution_affine_ ## name, NULL \
3244     },
3245 
3246 #define BILINEAR_AFFINE_FAST_PATH(name, format, repeat)			\
3247     { PIXMAN_ ## format,						\
3248       GENERAL_BILINEAR_FLAGS | FAST_PATH_ ## repeat ## _REPEAT,		\
3249       ITER_NARROW | ITER_SRC,						\
3250       NULL, bits_image_fetch_bilinear_affine_ ## name, NULL,		\
3251     },
3252 
3253 #define NEAREST_AFFINE_FAST_PATH(name, format, repeat)			\
3254     { PIXMAN_ ## format,						\
3255       GENERAL_NEAREST_FLAGS | FAST_PATH_ ## repeat ## _REPEAT,		\
3256       ITER_NARROW | ITER_SRC,						\
3257       NULL, bits_image_fetch_nearest_affine_ ## name, NULL		\
3258     },
3259 
3260 #define AFFINE_FAST_PATHS(name, format, repeat)				\
3261     NEAREST_AFFINE_FAST_PATH(name, format, repeat)			\
3262     BILINEAR_AFFINE_FAST_PATH(name, format, repeat)			\
3263     SEPARABLE_CONVOLUTION_AFFINE_FAST_PATH(name, format, repeat)
3264 
3265     AFFINE_FAST_PATHS (pad_a8r8g8b8, a8r8g8b8, PAD)
3266     AFFINE_FAST_PATHS (none_a8r8g8b8, a8r8g8b8, NONE)
3267     AFFINE_FAST_PATHS (reflect_a8r8g8b8, a8r8g8b8, REFLECT)
3268     AFFINE_FAST_PATHS (normal_a8r8g8b8, a8r8g8b8, NORMAL)
3269     AFFINE_FAST_PATHS (pad_x8r8g8b8, x8r8g8b8, PAD)
3270     AFFINE_FAST_PATHS (none_x8r8g8b8, x8r8g8b8, NONE)
3271     AFFINE_FAST_PATHS (reflect_x8r8g8b8, x8r8g8b8, REFLECT)
3272     AFFINE_FAST_PATHS (normal_x8r8g8b8, x8r8g8b8, NORMAL)
3273     AFFINE_FAST_PATHS (pad_a8, a8, PAD)
3274     AFFINE_FAST_PATHS (none_a8, a8, NONE)
3275     AFFINE_FAST_PATHS (reflect_a8, a8, REFLECT)
3276     AFFINE_FAST_PATHS (normal_a8, a8, NORMAL)
3277     AFFINE_FAST_PATHS (pad_r5g6b5, r5g6b5, PAD)
3278     AFFINE_FAST_PATHS (none_r5g6b5, r5g6b5, NONE)
3279     AFFINE_FAST_PATHS (reflect_r5g6b5, r5g6b5, REFLECT)
3280     AFFINE_FAST_PATHS (normal_r5g6b5, r5g6b5, NORMAL)
3281 
3282     { PIXMAN_null },
3283 };
3284 
3285 pixman_implementation_t *
_pixman_implementation_create_fast_path(pixman_implementation_t * fallback)3286 _pixman_implementation_create_fast_path (pixman_implementation_t *fallback)
3287 {
3288     pixman_implementation_t *imp = _pixman_implementation_create (fallback, c_fast_paths);
3289 
3290     imp->fill = fast_path_fill;
3291     imp->iter_info = fast_iters;
3292 
3293     return imp;
3294 }
3295