1changeset: 94061:73a9b24d863a 2tag: bilin 3tag: qbase 4tag: qtip 5tag: tip 6user: Jeff Muizelaar <jmuizelaar@mozilla.com> 7date: Tue May 15 18:26:16 2012 -0400 8summary: Bug 754364. Add bilinear non-repeat and repeat fast paths. r=joe 9 10diff --git a/gfx/cairo/libpixman/src/pixman-fast-path.c b/gfx/cairo/libpixman/src/pixman-fast-path.c 11--- a/gfx/cairo/libpixman/src/pixman-fast-path.c 12+++ b/gfx/cairo/libpixman/src/pixman-fast-path.c 13@@ -1186,16 +1186,228 @@ FAST_NEAREST (8888_565_none, 8888, 0565, 14 FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, SRC, PAD) 15 FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, SRC, NORMAL) 16 FAST_NEAREST (565_565_normal, 0565, 0565, uint16_t, uint16_t, SRC, NORMAL) 17 FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, OVER, COVER) 18 FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, NONE) 19 FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, OVER, PAD) 20 FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL) 21 22+static force_inline void 23+scaled_bilinear_scanline_8888_565_OVER (uint16_t * dst, 24+ const uint32_t * mask, 25+ const uint32_t * src_top, 26+ const uint32_t * src_bottom, 27+ int32_t w, 28+ int wt, 29+ int wb, 30+ pixman_fixed_t vx, 31+ pixman_fixed_t unit_x, 32+ pixman_fixed_t max_vx, 33+ pixman_bool_t zero_src) 34+{ 35+ while ((w -= 1) >= 0) 36+ { 37+ uint32_t tl = src_top [pixman_fixed_to_int (vx)]; 38+ uint32_t tr = src_top [pixman_fixed_to_int (vx) + 1]; 39+ uint32_t bl = src_bottom [pixman_fixed_to_int (vx)]; 40+ uint32_t br = src_bottom [pixman_fixed_to_int (vx) + 1]; 41+ uint32_t src, result; 42+ uint16_t d; 43+ d = *dst; 44+ src = bilinear_interpolation (tl, tr, 45+ bl, br, 46+ interpolation_coord(vx), 47+ wb >> (8 - INTERPOLATION_PRECISION_BITS)); 48+ vx += unit_x; 49+ result = over (src, CONVERT_0565_TO_0888 (d)); 50+ *dst++ = CONVERT_8888_TO_0565(result); 51+ } 52+} 53+ 54+static force_inline void 55+scaled_bilinear_scanline_8888_8888_OVER (uint32_t * dst, 56+ const uint32_t * mask, 57+ const uint32_t * src_top, 58+ const uint32_t * src_bottom, 59+ int32_t w, 60+ int wt, 61+ int wb, 62+ pixman_fixed_t vx, 63+ pixman_fixed_t unit_x, 64+ pixman_fixed_t max_vx, 65+ pixman_bool_t zero_src) 66+{ 67+ while ((w -= 1) >= 0) 68+ { 69+ uint32_t tl = src_top [pixman_fixed_to_int (vx)]; 70+ uint32_t tr = src_top [pixman_fixed_to_int (vx) + 1]; 71+ uint32_t bl = src_bottom [pixman_fixed_to_int (vx)]; 72+ uint32_t br = src_bottom [pixman_fixed_to_int (vx) + 1]; 73+ uint32_t src; 74+ uint32_t d; 75+ uint32_t result; 76+ d = *dst; 77+ src = bilinear_interpolation (tl, tr, 78+ bl, br, 79+ interpolation_coord(vx), 80+ wb >> (8 - INTERPOLATION_PRECISION_BITS)); 81+ vx += unit_x; 82+ *dst++ = over (src, d); 83+ } 84+} 85+ 86+#if 1 87+ 88+static force_inline void 89+scaled_bilinear_scanline_565_565_SRC (uint16_t * dst, 90+ const uint32_t * mask, 91+ const uint16_t * src_top, 92+ const uint16_t * src_bottom, 93+ int32_t w, 94+ int wt, 95+ int wb, 96+ pixman_fixed_t vx, 97+ pixman_fixed_t unit_x, 98+ pixman_fixed_t max_vx, 99+ pixman_bool_t zero_src) 100+{ 101+ while ((w -= 1) >= 0) 102+ { 103+ uint16_t tl = src_top [pixman_fixed_to_int (vx)]; 104+ uint16_t tr = src_top [pixman_fixed_to_int (vx) + 1]; 105+ uint16_t bl = src_bottom [pixman_fixed_to_int (vx)]; 106+ uint16_t br = src_bottom [pixman_fixed_to_int (vx) + 1]; 107+ uint32_t d; 108+ d = bilinear_interpolation(CONVERT_0565_TO_8888(tl), 109+ CONVERT_0565_TO_8888(tr), 110+ CONVERT_0565_TO_8888(bl), 111+ CONVERT_0565_TO_8888(br), 112+ interpolation_coord(vx), 113+ wb >> (8 - INTERPOLATION_PRECISION_BITS)); 114+ vx += unit_x; 115+ *dst++ = CONVERT_8888_TO_0565(d); 116+ } 117+} 118+ 119+#else 120+ 121+#define SK_G16_MASK_IN_PLACE 0xfc0 122+ 123+static inline uint32_t SkExpand_rgb_16(uint16_t c) { 124+ 125+ return ((c & SK_G16_MASK_IN_PLACE) << 16) | (c & ~SK_G16_MASK_IN_PLACE); 126+} 127+ 128+/** Compress an expanded value (from SkExpand_rgb_16) back down to a 16bit 129+ color value. The computation yields only 16bits of valid data, but we claim 130+ to return 32bits, so that the compiler won't generate extra instructions to 131+ "clean" the top 16bits. However, the top 16 can contain garbage, so it is 132+ up to the caller to safely ignore them. 133+*/ 134+static inline uint16_t SkCompact_rgb_16(uint32_t c) { 135+ return ((c >> 16) & SK_G16_MASK_IN_PLACE) | (c & ~SK_G16_MASK_IN_PLACE); 136+} 137+// returns expanded * 5bits 138+static inline uint32_t Filter_565_Expanded(unsigned x, unsigned y, 139+ uint32_t a00, uint32_t a01, 140+ uint32_t a10, uint32_t a11) { 141+ a00 = SkExpand_rgb_16(a00); 142+ a01 = SkExpand_rgb_16(a01); 143+ a10 = SkExpand_rgb_16(a10); 144+ a11 = SkExpand_rgb_16(a11); 145+ 146+ int xy = x * y >> 3; 147+ return a00 * (32 - 2*y - 2*x + xy) + 148+ a01 * (2*x - xy) + 149+ a10 * (2*y - xy) + 150+ a11 * xy; 151+} 152+ 153+ 154+ 155+static force_inline void 156+scaled_bilinear_scanline_565_565_SRC (uint16_t * dst, 157+ const uint32_t * mask, 158+ const uint16_t * src_top, 159+ const uint16_t * src_bottom, 160+ int32_t w, 161+ int wt, 162+ int wb, 163+ pixman_fixed_t vx, 164+ pixman_fixed_t unit_x, 165+ pixman_fixed_t max_vx, 166+ pixman_bool_t zero_src) 167+{ 168+ while ((w -= 1) >= 0) 169+ { 170+ uint16_t tl = src_top [pixman_fixed_to_int (vx)]; 171+ uint16_t tr = src_top [pixman_fixed_to_int (vx) + 1]; 172+ uint16_t bl = src_bottom [pixman_fixed_to_int (vx)]; 173+ uint16_t br = src_bottom [pixman_fixed_to_int (vx) + 1]; 174+ 175+ uint32_t tmp = Filter_565_Expanded((vx>>12)&0xf, wb>>4, tl, tr, bl, br); 176+ vx += unit_x; 177+ *dst++ = SkCompact_rgb_16((tmp) >> 5); 178+ } 179+} 180+ 181+ 182+#endif 183+FAST_BILINEAR_MAINLOOP_COMMON (565_565_cover_SRC, 184+ scaled_bilinear_scanline_565_565_SRC, 185+ uint16_t, uint32_t, uint16_t, 186+ COVER, FLAG_NONE) 187+FAST_BILINEAR_MAINLOOP_COMMON (565_565_pad_SRC, 188+ scaled_bilinear_scanline_565_565_SRC, 189+ uint16_t, uint32_t, uint16_t, 190+ PAD, FLAG_NONE) 191+FAST_BILINEAR_MAINLOOP_COMMON (565_565_none_SRC, 192+ scaled_bilinear_scanline_565_565_SRC, 193+ uint16_t, uint32_t, uint16_t, 194+ NONE, FLAG_NONE) 195+FAST_BILINEAR_MAINLOOP_COMMON (565_565_normal_SRC, 196+ scaled_bilinear_scanline_565_565_SRC, 197+ uint16_t, uint32_t, uint16_t, 198+ NORMAL, FLAG_NONE) 199+ 200+FAST_BILINEAR_MAINLOOP_COMMON (8888_565_cover_OVER, 201+ scaled_bilinear_scanline_8888_565_OVER, 202+ uint32_t, uint32_t, uint16_t, 203+ COVER, FLAG_NONE) 204+FAST_BILINEAR_MAINLOOP_COMMON (8888_565_pad_OVER, 205+ scaled_bilinear_scanline_8888_565_OVER, 206+ uint32_t, uint32_t, uint16_t, 207+ PAD, FLAG_NONE) 208+FAST_BILINEAR_MAINLOOP_COMMON (8888_565_none_OVER, 209+ scaled_bilinear_scanline_8888_565_OVER, 210+ uint32_t, uint32_t, uint16_t, 211+ NONE, FLAG_NONE) 212+FAST_BILINEAR_MAINLOOP_COMMON (8888_565_normal_OVER, 213+ scaled_bilinear_scanline_8888_565_OVER, 214+ uint32_t, uint32_t, uint16_t, 215+ NORMAL, FLAG_NONE) 216+ 217+FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_cover_OVER, 218+ scaled_bilinear_scanline_8888_8888_OVER, 219+ uint32_t, uint32_t, uint32_t, 220+ COVER, FLAG_NONE) 221+FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_pad_OVER, 222+ scaled_bilinear_scanline_8888_8888_OVER, 223+ uint32_t, uint32_t, uint32_t, 224+ PAD, FLAG_NONE) 225+FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_none_OVER, 226+ scaled_bilinear_scanline_8888_8888_OVER, 227+ uint32_t, uint32_t, uint32_t, 228+ NONE, FLAG_NONE) 229+FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_normal_OVER, 230+ scaled_bilinear_scanline_8888_8888_OVER, 231+ uint32_t, uint32_t, uint32_t, 232+ NORMAL, FLAG_NONE) 233+ 234 #define REPEAT_MIN_WIDTH 32 235 236 static void 237 fast_composite_tiled_repeat (pixman_implementation_t *imp, 238 pixman_composite_info_t *info) 239 { 240 PIXMAN_COMPOSITE_ARGS (info); 241 pixman_composite_func_t func; 242@@ -1960,16 +2172,20 @@ static const pixman_fast_path_t c_fast_p 243 PIXMAN_any, 244 (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | FAST_PATH_BITS_IMAGE | 245 FAST_PATH_NORMAL_REPEAT), 246 PIXMAN_any, 0, 247 PIXMAN_any, FAST_PATH_STD_DEST_FLAGS, 248 fast_composite_tiled_repeat 249 }, 250 251+ SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, r5g6b5, 565_565), 252+ SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, r5g6b5, 8888_565), 253+ SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, 8888_8888), 254+ 255 { PIXMAN_OP_NONE }, 256 }; 257 258 #ifdef WORDS_BIGENDIAN 259 #define A1_FILL_MASK(n, offs) (((1U << (n)) - 1) << (32 - (offs) - (n))) 260 #else 261 #define A1_FILL_MASK(n, offs) (((1U << (n)) - 1) << (offs)) 262 #endif 263diff --git a/gfx/cairo/libpixman/src/pixman-inlines.h b/gfx/cairo/libpixman/src/pixman-inlines.h 264--- a/gfx/cairo/libpixman/src/pixman-inlines.h 265+++ b/gfx/cairo/libpixman/src/pixman-inlines.h 266@@ -80,16 +80,21 @@ repeat (pixman_repeat_t repeat, int *c, 267 } 268 return TRUE; 269 } 270 271 #ifdef MOZ_GFX_OPTIMIZE_MOBILE 272 #define LOW_QUALITY_INTERPOLATION 273 #endif 274 275+#ifdef LOW_QUALITY_INTERPOLATION 276+#define INTERPOLATION_PRECISION_BITS 4 277+#else 278+#define INTERPOLATION_PRECISION_BITS 8 279+#endif 280 static force_inline int32_t 281 interpolation_coord(pixman_fixed_t t) 282 { 283 #ifdef LOW_QUALITY_INTERPOLATION 284 return (t >> 12) & 0xf; 285 #else 286 return (t >> 8) & 0xff; 287 #endif 288