1changeset:   94061:73a9b24d863a
2tag:         bilin
3tag:         qbase
4tag:         qtip
5tag:         tip
6user:        Jeff Muizelaar <jmuizelaar@mozilla.com>
7date:        Tue May 15 18:26:16 2012 -0400
8summary:     Bug 754364. Add bilinear non-repeat and repeat fast paths. r=joe
9
10diff --git a/gfx/cairo/libpixman/src/pixman-fast-path.c b/gfx/cairo/libpixman/src/pixman-fast-path.c
11--- a/gfx/cairo/libpixman/src/pixman-fast-path.c
12+++ b/gfx/cairo/libpixman/src/pixman-fast-path.c
13@@ -1186,16 +1186,228 @@ FAST_NEAREST (8888_565_none, 8888, 0565,
14 FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, SRC, PAD)
15 FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, SRC, NORMAL)
16 FAST_NEAREST (565_565_normal, 0565, 0565, uint16_t, uint16_t, SRC, NORMAL)
17 FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, OVER, COVER)
18 FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, NONE)
19 FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, OVER, PAD)
20 FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL)
21
22+static force_inline void
23+scaled_bilinear_scanline_8888_565_OVER (uint16_t *       dst,
24+                                        const uint32_t * mask,
25+                                        const uint32_t * src_top,
26+                                        const uint32_t * src_bottom,
27+                                        int32_t          w,
28+                                        int              wt,
29+                                        int              wb,
30+                                        pixman_fixed_t   vx,
31+                                        pixman_fixed_t   unit_x,
32+                                        pixman_fixed_t   max_vx,
33+                                        pixman_bool_t    zero_src)
34+{
35+    while ((w -= 1) >= 0)
36+    {
37+	uint32_t tl = src_top [pixman_fixed_to_int (vx)];
38+	uint32_t tr = src_top [pixman_fixed_to_int (vx) + 1];
39+	uint32_t bl = src_bottom [pixman_fixed_to_int (vx)];
40+	uint32_t br = src_bottom [pixman_fixed_to_int (vx) + 1];
41+	uint32_t src, result;
42+	uint16_t d;
43+	d = *dst;
44+	src = bilinear_interpolation (tl, tr,
45+				      bl, br,
46+				      interpolation_coord(vx),
47+				      wb >> (8 - INTERPOLATION_PRECISION_BITS));
48+	vx += unit_x;
49+	result = over (src, CONVERT_0565_TO_0888 (d));
50+	*dst++ = CONVERT_8888_TO_0565(result);
51+    }
52+}
53+
54+static force_inline void
55+scaled_bilinear_scanline_8888_8888_OVER (uint32_t *       dst,
56+                                         const uint32_t * mask,
57+                                         const uint32_t * src_top,
58+                                         const uint32_t * src_bottom,
59+                                         int32_t          w,
60+                                         int              wt,
61+                                         int              wb,
62+                                         pixman_fixed_t   vx,
63+                                         pixman_fixed_t   unit_x,
64+                                         pixman_fixed_t   max_vx,
65+                                         pixman_bool_t    zero_src)
66+{
67+    while ((w -= 1) >= 0)
68+    {
69+	uint32_t tl = src_top [pixman_fixed_to_int (vx)];
70+	uint32_t tr = src_top [pixman_fixed_to_int (vx) + 1];
71+	uint32_t bl = src_bottom [pixman_fixed_to_int (vx)];
72+	uint32_t br = src_bottom [pixman_fixed_to_int (vx) + 1];
73+	uint32_t src;
74+	uint32_t d;
75+	uint32_t result;
76+	d = *dst;
77+	src = bilinear_interpolation (tl, tr,
78+				      bl, br,
79+				      interpolation_coord(vx),
80+				      wb >> (8 - INTERPOLATION_PRECISION_BITS));
81+	vx += unit_x;
82+	*dst++ = over (src, d);
83+    }
84+}
85+
86+#if 1
87+
88+static force_inline void
89+scaled_bilinear_scanline_565_565_SRC (uint16_t *       dst,
90+				      const uint32_t * mask,
91+				      const uint16_t * src_top,
92+				      const uint16_t * src_bottom,
93+				      int32_t          w,
94+				      int              wt,
95+				      int              wb,
96+				      pixman_fixed_t   vx,
97+				      pixman_fixed_t   unit_x,
98+				      pixman_fixed_t   max_vx,
99+				      pixman_bool_t    zero_src)
100+{
101+    while ((w -= 1) >= 0)
102+    {
103+	uint16_t tl = src_top [pixman_fixed_to_int (vx)];
104+	uint16_t tr = src_top [pixman_fixed_to_int (vx) + 1];
105+	uint16_t bl = src_bottom [pixman_fixed_to_int (vx)];
106+	uint16_t br = src_bottom [pixman_fixed_to_int (vx) + 1];
107+	uint32_t d;
108+	d = bilinear_interpolation(CONVERT_0565_TO_8888(tl),
109+				   CONVERT_0565_TO_8888(tr),
110+				   CONVERT_0565_TO_8888(bl),
111+				   CONVERT_0565_TO_8888(br),
112+				   interpolation_coord(vx),
113+				   wb >> (8 - INTERPOLATION_PRECISION_BITS));
114+	vx += unit_x;
115+	*dst++ = CONVERT_8888_TO_0565(d);
116+    }
117+}
118+
119+#else
120+
121+#define SK_G16_MASK_IN_PLACE 0xfc0
122+
123+static inline uint32_t SkExpand_rgb_16(uint16_t c) {
124+
125+    return ((c & SK_G16_MASK_IN_PLACE) << 16) | (c & ~SK_G16_MASK_IN_PLACE);
126+}
127+
128+/** Compress an expanded value (from SkExpand_rgb_16) back down to a 16bit
129+    color value. The computation yields only 16bits of valid data, but we claim
130+    to return 32bits, so that the compiler won't generate extra instructions to
131+    "clean" the top 16bits. However, the top 16 can contain garbage, so it is
132+    up to the caller to safely ignore them.
133+*/
134+static inline uint16_t SkCompact_rgb_16(uint32_t c) {
135+    return ((c >> 16) & SK_G16_MASK_IN_PLACE) | (c & ~SK_G16_MASK_IN_PLACE);
136+}
137+// returns expanded * 5bits
138+static inline uint32_t Filter_565_Expanded(unsigned x, unsigned y,
139+                                           uint32_t a00, uint32_t a01,
140+                                           uint32_t a10, uint32_t a11) {
141+    a00 = SkExpand_rgb_16(a00);
142+    a01 = SkExpand_rgb_16(a01);
143+    a10 = SkExpand_rgb_16(a10);
144+    a11 = SkExpand_rgb_16(a11);
145+
146+    int xy = x * y >> 3;
147+    return  a00 * (32 - 2*y - 2*x + xy) +
148+            a01 * (2*x - xy) +
149+            a10 * (2*y - xy) +
150+            a11 * xy;
151+}
152+
153+
154+
155+static force_inline void
156+scaled_bilinear_scanline_565_565_SRC (uint16_t *       dst,
157+				      const uint32_t * mask,
158+				      const uint16_t * src_top,
159+				      const uint16_t * src_bottom,
160+				      int32_t          w,
161+				      int              wt,
162+				      int              wb,
163+				      pixman_fixed_t   vx,
164+				      pixman_fixed_t   unit_x,
165+				      pixman_fixed_t   max_vx,
166+				      pixman_bool_t    zero_src)
167+{
168+    while ((w -= 1) >= 0)
169+    {
170+	uint16_t tl = src_top [pixman_fixed_to_int (vx)];
171+	uint16_t tr = src_top [pixman_fixed_to_int (vx) + 1];
172+	uint16_t bl = src_bottom [pixman_fixed_to_int (vx)];
173+	uint16_t br = src_bottom [pixman_fixed_to_int (vx) + 1];
174+
175+        uint32_t tmp = Filter_565_Expanded((vx>>12)&0xf, wb>>4, tl, tr, bl, br);
176+        vx += unit_x;
177+        *dst++ = SkCompact_rgb_16((tmp) >> 5);
178+    }
179+}
180+
181+
182+#endif
183+FAST_BILINEAR_MAINLOOP_COMMON (565_565_cover_SRC,
184+			       scaled_bilinear_scanline_565_565_SRC,
185+			       uint16_t, uint32_t, uint16_t,
186+			       COVER, FLAG_NONE)
187+FAST_BILINEAR_MAINLOOP_COMMON (565_565_pad_SRC,
188+			       scaled_bilinear_scanline_565_565_SRC,
189+			       uint16_t, uint32_t, uint16_t,
190+			       PAD, FLAG_NONE)
191+FAST_BILINEAR_MAINLOOP_COMMON (565_565_none_SRC,
192+			       scaled_bilinear_scanline_565_565_SRC,
193+			       uint16_t, uint32_t, uint16_t,
194+			       NONE, FLAG_NONE)
195+FAST_BILINEAR_MAINLOOP_COMMON (565_565_normal_SRC,
196+			       scaled_bilinear_scanline_565_565_SRC,
197+			       uint16_t, uint32_t, uint16_t,
198+			       NORMAL, FLAG_NONE)
199+
200+FAST_BILINEAR_MAINLOOP_COMMON (8888_565_cover_OVER,
201+			       scaled_bilinear_scanline_8888_565_OVER,
202+			       uint32_t, uint32_t, uint16_t,
203+			       COVER, FLAG_NONE)
204+FAST_BILINEAR_MAINLOOP_COMMON (8888_565_pad_OVER,
205+			       scaled_bilinear_scanline_8888_565_OVER,
206+			       uint32_t, uint32_t, uint16_t,
207+			       PAD, FLAG_NONE)
208+FAST_BILINEAR_MAINLOOP_COMMON (8888_565_none_OVER,
209+			       scaled_bilinear_scanline_8888_565_OVER,
210+			       uint32_t, uint32_t, uint16_t,
211+			       NONE, FLAG_NONE)
212+FAST_BILINEAR_MAINLOOP_COMMON (8888_565_normal_OVER,
213+			       scaled_bilinear_scanline_8888_565_OVER,
214+			       uint32_t, uint32_t, uint16_t,
215+			       NORMAL, FLAG_NONE)
216+
217+FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_cover_OVER,
218+			       scaled_bilinear_scanline_8888_8888_OVER,
219+			       uint32_t, uint32_t, uint32_t,
220+			       COVER, FLAG_NONE)
221+FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_pad_OVER,
222+			       scaled_bilinear_scanline_8888_8888_OVER,
223+			       uint32_t, uint32_t, uint32_t,
224+			       PAD, FLAG_NONE)
225+FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_none_OVER,
226+			       scaled_bilinear_scanline_8888_8888_OVER,
227+			       uint32_t, uint32_t, uint32_t,
228+			       NONE, FLAG_NONE)
229+FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_normal_OVER,
230+			       scaled_bilinear_scanline_8888_8888_OVER,
231+			       uint32_t, uint32_t, uint32_t,
232+			       NORMAL, FLAG_NONE)
233+
234 #define REPEAT_MIN_WIDTH    32
235
236 static void
237 fast_composite_tiled_repeat (pixman_implementation_t *imp,
238 			     pixman_composite_info_t *info)
239 {
240     PIXMAN_COMPOSITE_ARGS (info);
241     pixman_composite_func_t func;
242@@ -1960,16 +2172,20 @@ static const pixman_fast_path_t c_fast_p
243 	PIXMAN_any,
244 	(FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | FAST_PATH_BITS_IMAGE |
245 	 FAST_PATH_NORMAL_REPEAT),
246 	PIXMAN_any, 0,
247 	PIXMAN_any, FAST_PATH_STD_DEST_FLAGS,
248 	fast_composite_tiled_repeat
249     },
250
251+    SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, r5g6b5, 565_565),
252+    SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, r5g6b5, 8888_565),
253+    SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, 8888_8888),
254+
255     {   PIXMAN_OP_NONE	},
256 };
257
258 #ifdef WORDS_BIGENDIAN
259 #define A1_FILL_MASK(n, offs) (((1U << (n)) - 1) << (32 - (offs) - (n)))
260 #else
261 #define A1_FILL_MASK(n, offs) (((1U << (n)) - 1) << (offs))
262 #endif
263diff --git a/gfx/cairo/libpixman/src/pixman-inlines.h b/gfx/cairo/libpixman/src/pixman-inlines.h
264--- a/gfx/cairo/libpixman/src/pixman-inlines.h
265+++ b/gfx/cairo/libpixman/src/pixman-inlines.h
266@@ -80,16 +80,21 @@ repeat (pixman_repeat_t repeat, int *c,
267     }
268     return TRUE;
269 }
270
271 #ifdef MOZ_GFX_OPTIMIZE_MOBILE
272 #define LOW_QUALITY_INTERPOLATION
273 #endif
274
275+#ifdef LOW_QUALITY_INTERPOLATION
276+#define INTERPOLATION_PRECISION_BITS 4
277+#else
278+#define INTERPOLATION_PRECISION_BITS 8
279+#endif
280 static force_inline int32_t
281 interpolation_coord(pixman_fixed_t t)
282 {
283 #ifdef LOW_QUALITY_INTERPOLATION
284     return (t >> 12) & 0xf;
285 #else
286     return (t >> 8) & 0xff;
287 #endif
288