1 // Copyright 2014 Google Inc. All Rights Reserved.
2 //
3 // Use of this source code is governed by a BSD-style license
4 // that can be found in the COPYING file in the root of the source
5 // tree. An additional intellectual property rights grant can be found
6 // in the file PATENTS. All contributing project authors may
7 // be found in the AUTHORS file in the root of the source tree.
8 // -----------------------------------------------------------------------------
9 //
10 // Utilities for processing transparent channel.
11 //
12 // Author(s): Branimir Vasic (branimir.vasic@imgtec.com)
13 //            Djordje Pesut  (djordje.pesut@imgtec.com)
14 
15 #include "src/dsp/dsp.h"
16 
17 #if defined(WEBP_USE_MIPS_DSP_R2)
18 
DispatchAlpha_MIPSdspR2(const uint8_t * alpha,int alpha_stride,int width,int height,uint8_t * dst,int dst_stride)19 static int DispatchAlpha_MIPSdspR2(const uint8_t* alpha, int alpha_stride,
20                                    int width, int height,
21                                    uint8_t* dst, int dst_stride) {
22   uint32_t alpha_mask = 0xffffffff;
23   int i, j, temp0;
24 
25   for (j = 0; j < height; ++j) {
26     uint8_t* pdst = dst;
27     const uint8_t* palpha = alpha;
28     for (i = 0; i < (width >> 2); ++i) {
29       int temp1, temp2, temp3;
30 
31       __asm__ volatile (
32         "ulw    %[temp0],      0(%[palpha])                \n\t"
33         "addiu  %[palpha],     %[palpha],     4            \n\t"
34         "addiu  %[pdst],       %[pdst],       16           \n\t"
35         "srl    %[temp1],      %[temp0],      8            \n\t"
36         "srl    %[temp2],      %[temp0],      16           \n\t"
37         "srl    %[temp3],      %[temp0],      24           \n\t"
38         "and    %[alpha_mask], %[alpha_mask], %[temp0]     \n\t"
39         "sb     %[temp0],      -16(%[pdst])                \n\t"
40         "sb     %[temp1],      -12(%[pdst])                \n\t"
41         "sb     %[temp2],      -8(%[pdst])                 \n\t"
42         "sb     %[temp3],      -4(%[pdst])                 \n\t"
43         : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
44           [temp3]"=&r"(temp3), [palpha]"+r"(palpha), [pdst]"+r"(pdst),
45           [alpha_mask]"+r"(alpha_mask)
46         :
47         : "memory"
48       );
49     }
50 
51     for (i = 0; i < (width & 3); ++i) {
52       __asm__ volatile (
53         "lbu    %[temp0],      0(%[palpha])                \n\t"
54         "addiu  %[palpha],     %[palpha],     1            \n\t"
55         "sb     %[temp0],      0(%[pdst])                  \n\t"
56         "and    %[alpha_mask], %[alpha_mask], %[temp0]     \n\t"
57         "addiu  %[pdst],       %[pdst],       4            \n\t"
58         : [temp0]"=&r"(temp0), [palpha]"+r"(palpha), [pdst]"+r"(pdst),
59           [alpha_mask]"+r"(alpha_mask)
60         :
61         : "memory"
62       );
63     }
64     alpha += alpha_stride;
65     dst += dst_stride;
66   }
67 
68   __asm__ volatile (
69     "ext    %[temp0],      %[alpha_mask], 0, 16            \n\t"
70     "srl    %[alpha_mask], %[alpha_mask], 16               \n\t"
71     "and    %[alpha_mask], %[alpha_mask], %[temp0]         \n\t"
72     "ext    %[temp0],      %[alpha_mask], 0, 8             \n\t"
73     "srl    %[alpha_mask], %[alpha_mask], 8                \n\t"
74     "and    %[alpha_mask], %[alpha_mask], %[temp0]         \n\t"
75     : [temp0]"=&r"(temp0), [alpha_mask]"+r"(alpha_mask)
76     :
77   );
78 
79   return (alpha_mask != 0xff);
80 }
81 
MultARGBRow_MIPSdspR2(uint32_t * const ptr,int width,int inverse)82 static void MultARGBRow_MIPSdspR2(uint32_t* const ptr, int width,
83                                   int inverse) {
84   int x;
85   const uint32_t c_00ffffff = 0x00ffffffu;
86   const uint32_t c_ff000000 = 0xff000000u;
87   const uint32_t c_8000000  = 0x00800000u;
88   const uint32_t c_8000080  = 0x00800080u;
89   for (x = 0; x < width; ++x) {
90     const uint32_t argb = ptr[x];
91     if (argb < 0xff000000u) {      // alpha < 255
92       if (argb <= 0x00ffffffu) {   // alpha == 0
93         ptr[x] = 0;
94       } else {
95         int temp0, temp1, temp2, temp3, alpha;
96         __asm__ volatile (
97           "srl          %[alpha],   %[argb],       24                \n\t"
98           "replv.qb     %[temp0],   %[alpha]                         \n\t"
99           "and          %[temp0],   %[temp0],      %[c_00ffffff]     \n\t"
100           "beqz         %[inverse], 0f                               \n\t"
101           "divu         $zero,      %[c_ff000000], %[alpha]          \n\t"
102           "mflo         %[temp0]                                     \n\t"
103         "0:                                                          \n\t"
104           "andi         %[temp1],   %[argb],       0xff              \n\t"
105           "ext          %[temp2],   %[argb],       8,             8  \n\t"
106           "ext          %[temp3],   %[argb],       16,            8  \n\t"
107           "mul          %[temp1],   %[temp1],      %[temp0]          \n\t"
108           "mul          %[temp2],   %[temp2],      %[temp0]          \n\t"
109           "mul          %[temp3],   %[temp3],      %[temp0]          \n\t"
110           "precrq.ph.w  %[temp1],   %[temp2],      %[temp1]          \n\t"
111           "addu         %[temp3],   %[temp3],      %[c_8000000]      \n\t"
112           "addu         %[temp1],   %[temp1],      %[c_8000080]      \n\t"
113           "precrq.ph.w  %[temp3],   %[argb],       %[temp3]          \n\t"
114           "precrq.qb.ph %[temp1],   %[temp3],      %[temp1]          \n\t"
115           : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
116             [temp3]"=&r"(temp3), [alpha]"=&r"(alpha)
117           : [inverse]"r"(inverse), [c_00ffffff]"r"(c_00ffffff),
118             [c_8000000]"r"(c_8000000), [c_8000080]"r"(c_8000080),
119             [c_ff000000]"r"(c_ff000000), [argb]"r"(argb)
120           : "memory", "hi", "lo"
121         );
122         ptr[x] = temp1;
123       }
124     }
125   }
126 }
127 
128 #ifdef WORDS_BIGENDIAN
PackARGB_MIPSdspR2(const uint8_t * a,const uint8_t * r,const uint8_t * g,const uint8_t * b,int len,uint32_t * out)129 static void PackARGB_MIPSdspR2(const uint8_t* a, const uint8_t* r,
130                                const uint8_t* g, const uint8_t* b, int len,
131                                uint32_t* out) {
132   int temp0, temp1, temp2, temp3, offset;
133   const int rest = len & 1;
134   const uint32_t* const loop_end = out + len - rest;
135   const int step = 4;
136   __asm__ volatile (
137     "xor          %[offset],   %[offset], %[offset]    \n\t"
138     "beq          %[loop_end], %[out],    0f           \n\t"
139   "2:                                                  \n\t"
140     "lbux         %[temp0],    %[offset](%[a])         \n\t"
141     "lbux         %[temp1],    %[offset](%[r])         \n\t"
142     "lbux         %[temp2],    %[offset](%[g])         \n\t"
143     "lbux         %[temp3],    %[offset](%[b])         \n\t"
144     "ins          %[temp1],    %[temp0],  16,     16   \n\t"
145     "ins          %[temp3],    %[temp2],  16,     16   \n\t"
146     "addiu        %[out],      %[out],    4            \n\t"
147     "precr.qb.ph  %[temp0],    %[temp1],  %[temp3]     \n\t"
148     "sw           %[temp0],    -4(%[out])              \n\t"
149     "addu         %[offset],   %[offset], %[step]      \n\t"
150     "bne          %[loop_end], %[out],    2b           \n\t"
151   "0:                                                  \n\t"
152     "beq          %[rest],     $zero,     1f           \n\t"
153     "lbux         %[temp0],    %[offset](%[a])         \n\t"
154     "lbux         %[temp1],    %[offset](%[r])         \n\t"
155     "lbux         %[temp2],    %[offset](%[g])         \n\t"
156     "lbux         %[temp3],    %[offset](%[b])         \n\t"
157     "ins          %[temp1],    %[temp0],  16,     16   \n\t"
158     "ins          %[temp3],    %[temp2],  16,     16   \n\t"
159     "precr.qb.ph  %[temp0],    %[temp1],  %[temp3]     \n\t"
160     "sw           %[temp0],    0(%[out])               \n\t"
161   "1:                                                  \n\t"
162     : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
163       [temp3]"=&r"(temp3), [offset]"=&r"(offset), [out]"+&r"(out)
164     : [a]"r"(a), [r]"r"(r), [g]"r"(g), [b]"r"(b), [step]"r"(step),
165       [loop_end]"r"(loop_end), [rest]"r"(rest)
166     : "memory"
167   );
168 }
169 #endif  // WORDS_BIGENDIAN
170 
PackRGB_MIPSdspR2(const uint8_t * r,const uint8_t * g,const uint8_t * b,int len,int step,uint32_t * out)171 static void PackRGB_MIPSdspR2(const uint8_t* r, const uint8_t* g,
172                               const uint8_t* b, int len, int step,
173                               uint32_t* out) {
174   int temp0, temp1, temp2, offset;
175   const int rest = len & 1;
176   const int a = 0xff;
177   const uint32_t* const loop_end = out + len - rest;
178   __asm__ volatile (
179     "xor          %[offset],   %[offset], %[offset]    \n\t"
180     "beq          %[loop_end], %[out],    0f           \n\t"
181   "2:                                                  \n\t"
182     "lbux         %[temp0],    %[offset](%[r])         \n\t"
183     "lbux         %[temp1],    %[offset](%[g])         \n\t"
184     "lbux         %[temp2],    %[offset](%[b])         \n\t"
185     "ins          %[temp0],    %[a],      16,     16   \n\t"
186     "ins          %[temp2],    %[temp1],  16,     16   \n\t"
187     "addiu        %[out],      %[out],    4            \n\t"
188     "precr.qb.ph  %[temp0],    %[temp0],  %[temp2]     \n\t"
189     "sw           %[temp0],    -4(%[out])              \n\t"
190     "addu         %[offset],   %[offset], %[step]      \n\t"
191     "bne          %[loop_end], %[out],    2b           \n\t"
192   "0:                                                  \n\t"
193     "beq          %[rest],     $zero,     1f           \n\t"
194     "lbux         %[temp0],    %[offset](%[r])         \n\t"
195     "lbux         %[temp1],    %[offset](%[g])         \n\t"
196     "lbux         %[temp2],    %[offset](%[b])         \n\t"
197     "ins          %[temp0],    %[a],      16,     16   \n\t"
198     "ins          %[temp2],    %[temp1],  16,     16   \n\t"
199     "precr.qb.ph  %[temp0],    %[temp0],  %[temp2]     \n\t"
200     "sw           %[temp0],    0(%[out])               \n\t"
201   "1:                                                  \n\t"
202     : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
203       [offset]"=&r"(offset), [out]"+&r"(out)
204     : [a]"r"(a), [r]"r"(r), [g]"r"(g), [b]"r"(b), [step]"r"(step),
205       [loop_end]"r"(loop_end), [rest]"r"(rest)
206     : "memory"
207   );
208 }
209 
210 //------------------------------------------------------------------------------
211 // Entry point
212 
213 extern void WebPInitAlphaProcessingMIPSdspR2(void);
214 
WebPInitAlphaProcessingMIPSdspR2(void)215 WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessingMIPSdspR2(void) {
216   WebPDispatchAlpha = DispatchAlpha_MIPSdspR2;
217   WebPMultARGBRow = MultARGBRow_MIPSdspR2;
218 #ifdef WORDS_BIGENDIAN
219   WebPPackARGB = PackARGB_MIPSdspR2;
220 #endif
221   WebPPackRGB = PackRGB_MIPSdspR2;
222 }
223 
224 #else  // !WEBP_USE_MIPS_DSP_R2
225 
226 WEBP_DSP_INIT_STUB(WebPInitAlphaProcessingMIPSdspR2)
227 
228 #endif  // WEBP_USE_MIPS_DSP_R2
229