1 // Copyright 2014 Google Inc. All Rights Reserved.
2 //
3 // Use of this source code is governed by a BSD-style license
4 // that can be found in the COPYING file in the root of the source
5 // tree. An additional intellectual property rights grant can be found
6 // in the file PATENTS. All contributing project authors may
7 // be found in the AUTHORS file in the root of the source tree.
8 // -----------------------------------------------------------------------------
9 //
10 // MIPS version of rescaling functions
11 //
12 // Author(s): Djordje Pesut (djordje.pesut@imgtec.com)
13 
14 #include "src/dsp/dsp.h"
15 
16 #if defined(WEBP_USE_MIPS_DSP_R2) && !defined(WEBP_REDUCE_SIZE)
17 
18 #include <assert.h>
19 #include "src/utils/rescaler_utils.h"
20 
21 #define ROUNDER (WEBP_RESCALER_ONE >> 1)
22 #define MULT_FIX(x, y) (((uint64_t)(x) * (y) + ROUNDER) >> WEBP_RESCALER_RFIX)
23 #define MULT_FIX_FLOOR(x, y) (((uint64_t)(x) * (y)) >> WEBP_RESCALER_RFIX)
24 
25 //------------------------------------------------------------------------------
26 // Row export
27 
28 #if 0  // disabled for now. TODO(skal): make match the C-code
29 static void ExportRowShrink_MIPSdspR2(WebPRescaler* const wrk) {
30   int i;
31   const int x_out_max = wrk->dst_width * wrk->num_channels;
32   uint8_t* dst = wrk->dst;
33   rescaler_t* irow = wrk->irow;
34   const rescaler_t* frow = wrk->frow;
35   const int yscale = wrk->fy_scale * (-wrk->y_accum);
36   int temp0, temp1, temp2, temp3, temp4, temp5, loop_end;
37   const int temp7 = (int)wrk->fxy_scale;
38   const int temp6 = (x_out_max & ~0x3) << 2;
39   assert(!WebPRescalerOutputDone(wrk));
40   assert(wrk->y_accum <= 0);
41   assert(!wrk->y_expand);
42   assert(wrk->fxy_scale != 0);
43   if (yscale) {
44     if (x_out_max >= 4) {
45       int temp8, temp9, temp10, temp11;
46       __asm__ volatile (
47         "li       %[temp3],    0x10000                    \n\t"
48         "li       %[temp4],    0x8000                     \n\t"
49         "addu     %[loop_end], %[frow],     %[temp6]      \n\t"
50       "1:                                                 \n\t"
51         "lw       %[temp0],    0(%[frow])                 \n\t"
52         "lw       %[temp1],    4(%[frow])                 \n\t"
53         "lw       %[temp2],    8(%[frow])                 \n\t"
54         "lw       %[temp5],    12(%[frow])                \n\t"
55         "mult     $ac0,        %[temp3],    %[temp4]      \n\t"
56         "maddu    $ac0,        %[temp0],    %[yscale]     \n\t"
57         "mult     $ac1,        %[temp3],    %[temp4]      \n\t"
58         "maddu    $ac1,        %[temp1],    %[yscale]     \n\t"
59         "mult     $ac2,        %[temp3],    %[temp4]      \n\t"
60         "maddu    $ac2,        %[temp2],    %[yscale]     \n\t"
61         "mult     $ac3,        %[temp3],    %[temp4]      \n\t"
62         "maddu    $ac3,        %[temp5],    %[yscale]     \n\t"
63         "addiu    %[frow],     %[frow],     16            \n\t"
64         "mfhi     %[temp0],    $ac0                       \n\t"
65         "mfhi     %[temp1],    $ac1                       \n\t"
66         "mfhi     %[temp2],    $ac2                       \n\t"
67         "mfhi     %[temp5],    $ac3                       \n\t"
68         "lw       %[temp8],    0(%[irow])                 \n\t"
69         "lw       %[temp9],    4(%[irow])                 \n\t"
70         "lw       %[temp10],   8(%[irow])                 \n\t"
71         "lw       %[temp11],   12(%[irow])                \n\t"
72         "addiu    %[dst],      %[dst],      4             \n\t"
73         "addiu    %[irow],     %[irow],     16            \n\t"
74         "subu     %[temp8],    %[temp8],    %[temp0]      \n\t"
75         "subu     %[temp9],    %[temp9],    %[temp1]      \n\t"
76         "subu     %[temp10],   %[temp10],   %[temp2]      \n\t"
77         "subu     %[temp11],   %[temp11],   %[temp5]      \n\t"
78         "mult     $ac0,        %[temp3],    %[temp4]      \n\t"
79         "maddu    $ac0,        %[temp8],    %[temp7]      \n\t"
80         "mult     $ac1,        %[temp3],    %[temp4]      \n\t"
81         "maddu    $ac1,        %[temp9],    %[temp7]      \n\t"
82         "mult     $ac2,        %[temp3],    %[temp4]      \n\t"
83         "maddu    $ac2,        %[temp10],   %[temp7]      \n\t"
84         "mult     $ac3,        %[temp3],    %[temp4]      \n\t"
85         "maddu    $ac3,        %[temp11],   %[temp7]      \n\t"
86         "mfhi     %[temp8],    $ac0                       \n\t"
87         "mfhi     %[temp9],    $ac1                       \n\t"
88         "mfhi     %[temp10],   $ac2                       \n\t"
89         "mfhi     %[temp11],   $ac3                       \n\t"
90         "sw       %[temp0],    -16(%[irow])               \n\t"
91         "sw       %[temp1],    -12(%[irow])               \n\t"
92         "sw       %[temp2],    -8(%[irow])                \n\t"
93         "sw       %[temp5],    -4(%[irow])                \n\t"
94         "sb       %[temp8],    -4(%[dst])                 \n\t"
95         "sb       %[temp9],    -3(%[dst])                 \n\t"
96         "sb       %[temp10],   -2(%[dst])                 \n\t"
97         "sb       %[temp11],   -1(%[dst])                 \n\t"
98         "bne      %[frow],     %[loop_end], 1b            \n\t"
99         : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
100           [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),
101           [irow]"+r"(irow), [dst]"+r"(dst), [loop_end]"=&r"(loop_end),
102           [temp8]"=&r"(temp8), [temp9]"=&r"(temp9), [temp10]"=&r"(temp10),
103           [temp11]"=&r"(temp11), [temp2]"=&r"(temp2)
104         : [temp7]"r"(temp7), [yscale]"r"(yscale), [temp6]"r"(temp6)
105         : "memory", "hi", "lo", "$ac1hi", "$ac1lo",
106           "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"
107       );
108     }
109     for (i = 0; i < (x_out_max & 0x3); ++i) {
110       const uint32_t frac = (uint32_t)MULT_FIX_FLOOR(*frow++, yscale);
111       const int v = (int)MULT_FIX(*irow - frac, wrk->fxy_scale);
112       *dst++ = (v > 255) ? 255u : (uint8_t)v;
113       *irow++ = frac;   // new fractional start
114     }
115   } else {
116     if (x_out_max >= 4) {
117       __asm__ volatile (
118         "li       %[temp3],    0x10000                    \n\t"
119         "li       %[temp4],    0x8000                     \n\t"
120         "addu     %[loop_end], %[irow],     %[temp6]      \n\t"
121       "1:                                                 \n\t"
122         "lw       %[temp0],    0(%[irow])                 \n\t"
123         "lw       %[temp1],    4(%[irow])                 \n\t"
124         "lw       %[temp2],    8(%[irow])                 \n\t"
125         "lw       %[temp5],    12(%[irow])                \n\t"
126         "addiu    %[dst],      %[dst],      4             \n\t"
127         "addiu    %[irow],     %[irow],     16            \n\t"
128         "mult     $ac0,        %[temp3],    %[temp4]      \n\t"
129         "maddu    $ac0,        %[temp0],    %[temp7]      \n\t"
130         "mult     $ac1,        %[temp3],    %[temp4]      \n\t"
131         "maddu    $ac1,        %[temp1],    %[temp7]      \n\t"
132         "mult     $ac2,        %[temp3],    %[temp4]      \n\t"
133         "maddu    $ac2,        %[temp2],    %[temp7]      \n\t"
134         "mult     $ac3,        %[temp3],    %[temp4]      \n\t"
135         "maddu    $ac3,        %[temp5],    %[temp7]      \n\t"
136         "mfhi     %[temp0],    $ac0                       \n\t"
137         "mfhi     %[temp1],    $ac1                       \n\t"
138         "mfhi     %[temp2],    $ac2                       \n\t"
139         "mfhi     %[temp5],    $ac3                       \n\t"
140         "sw       $zero,       -16(%[irow])               \n\t"
141         "sw       $zero,       -12(%[irow])               \n\t"
142         "sw       $zero,       -8(%[irow])                \n\t"
143         "sw       $zero,       -4(%[irow])                \n\t"
144         "sb       %[temp0],    -4(%[dst])                 \n\t"
145         "sb       %[temp1],    -3(%[dst])                 \n\t"
146         "sb       %[temp2],    -2(%[dst])                 \n\t"
147         "sb       %[temp5],    -1(%[dst])                 \n\t"
148         "bne      %[irow],     %[loop_end], 1b            \n\t"
149         : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
150           [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [irow]"+r"(irow),
151           [dst]"+r"(dst), [loop_end]"=&r"(loop_end), [temp2]"=&r"(temp2)
152         : [temp7]"r"(temp7), [temp6]"r"(temp6)
153         : "memory", "hi", "lo", "$ac1hi", "$ac1lo",
154           "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"
155       );
156     }
157     for (i = 0; i < (x_out_max & 0x3); ++i) {
158       const int v = (int)MULT_FIX_FLOOR(*irow, wrk->fxy_scale);
159       *dst++ = (v > 255) ? 255u : (uint8_t)v;
160       *irow++ = 0;
161     }
162   }
163 }
164 #endif  // 0
165 
ExportRowExpand_MIPSdspR2(WebPRescaler * const wrk)166 static void ExportRowExpand_MIPSdspR2(WebPRescaler* const wrk) {
167   int i;
168   uint8_t* dst = wrk->dst;
169   rescaler_t* irow = wrk->irow;
170   const int x_out_max = wrk->dst_width * wrk->num_channels;
171   const rescaler_t* frow = wrk->frow;
172   int temp0, temp1, temp2, temp3, temp4, temp5, loop_end;
173   const int temp6 = (x_out_max & ~0x3) << 2;
174   const int temp7 = (int)wrk->fy_scale;
175   assert(!WebPRescalerOutputDone(wrk));
176   assert(wrk->y_accum <= 0);
177   assert(wrk->y_expand);
178   assert(wrk->y_sub != 0);
179   if (wrk->y_accum == 0) {
180     if (x_out_max >= 4) {
181       __asm__ volatile (
182         "li       %[temp4],    0x10000                    \n\t"
183         "li       %[temp5],    0x8000                     \n\t"
184         "addu     %[loop_end], %[frow],     %[temp6]      \n\t"
185       "1:                                                 \n\t"
186         "lw       %[temp0],    0(%[frow])                 \n\t"
187         "lw       %[temp1],    4(%[frow])                 \n\t"
188         "lw       %[temp2],    8(%[frow])                 \n\t"
189         "lw       %[temp3],    12(%[frow])                \n\t"
190         "addiu    %[dst],      %[dst],      4             \n\t"
191         "addiu    %[frow],     %[frow],     16            \n\t"
192         "mult     $ac0,        %[temp4],    %[temp5]      \n\t"
193         "maddu    $ac0,        %[temp0],    %[temp7]      \n\t"
194         "mult     $ac1,        %[temp4],    %[temp5]      \n\t"
195         "maddu    $ac1,        %[temp1],    %[temp7]      \n\t"
196         "mult     $ac2,        %[temp4],    %[temp5]      \n\t"
197         "maddu    $ac2,        %[temp2],    %[temp7]      \n\t"
198         "mult     $ac3,        %[temp4],    %[temp5]      \n\t"
199         "maddu    $ac3,        %[temp3],    %[temp7]      \n\t"
200         "mfhi     %[temp0],    $ac0                       \n\t"
201         "mfhi     %[temp1],    $ac1                       \n\t"
202         "mfhi     %[temp2],    $ac2                       \n\t"
203         "mfhi     %[temp3],    $ac3                       \n\t"
204         "sb       %[temp0],    -4(%[dst])                 \n\t"
205         "sb       %[temp1],    -3(%[dst])                 \n\t"
206         "sb       %[temp2],    -2(%[dst])                 \n\t"
207         "sb       %[temp3],    -1(%[dst])                 \n\t"
208         "bne      %[frow],     %[loop_end], 1b            \n\t"
209         : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
210           [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),
211           [dst]"+r"(dst), [loop_end]"=&r"(loop_end), [temp2]"=&r"(temp2)
212         : [temp7]"r"(temp7), [temp6]"r"(temp6)
213         : "memory", "hi", "lo", "$ac1hi", "$ac1lo",
214           "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"
215       );
216     }
217     for (i = 0; i < (x_out_max & 0x3); ++i) {
218       const uint32_t J = *frow++;
219       const int v = (int)MULT_FIX(J, wrk->fy_scale);
220       *dst++ = (v > 255) ? 255u : (uint8_t)v;
221     }
222   } else {
223     const uint32_t B = WEBP_RESCALER_FRAC(-wrk->y_accum, wrk->y_sub);
224     const uint32_t A = (uint32_t)(WEBP_RESCALER_ONE - B);
225     if (x_out_max >= 4) {
226       int temp8, temp9, temp10, temp11;
227       __asm__ volatile (
228         "li       %[temp8],    0x10000                    \n\t"
229         "li       %[temp9],    0x8000                     \n\t"
230         "addu     %[loop_end], %[frow],     %[temp6]      \n\t"
231       "1:                                                 \n\t"
232         "lw       %[temp0],    0(%[frow])                 \n\t"
233         "lw       %[temp1],    4(%[frow])                 \n\t"
234         "lw       %[temp2],    8(%[frow])                 \n\t"
235         "lw       %[temp3],    12(%[frow])                \n\t"
236         "lw       %[temp4],    0(%[irow])                 \n\t"
237         "lw       %[temp5],    4(%[irow])                 \n\t"
238         "lw       %[temp10],   8(%[irow])                 \n\t"
239         "lw       %[temp11],   12(%[irow])                \n\t"
240         "addiu    %[dst],      %[dst],      4             \n\t"
241         "mult     $ac0,        %[temp8],    %[temp9]      \n\t"
242         "maddu    $ac0,        %[A],        %[temp0]      \n\t"
243         "maddu    $ac0,        %[B],        %[temp4]      \n\t"
244         "mult     $ac1,        %[temp8],    %[temp9]      \n\t"
245         "maddu    $ac1,        %[A],        %[temp1]      \n\t"
246         "maddu    $ac1,        %[B],        %[temp5]      \n\t"
247         "mult     $ac2,        %[temp8],    %[temp9]      \n\t"
248         "maddu    $ac2,        %[A],        %[temp2]      \n\t"
249         "maddu    $ac2,        %[B],        %[temp10]     \n\t"
250         "mult     $ac3,        %[temp8],    %[temp9]      \n\t"
251         "maddu    $ac3,        %[A],        %[temp3]      \n\t"
252         "maddu    $ac3,        %[B],        %[temp11]     \n\t"
253         "addiu    %[frow],     %[frow],     16            \n\t"
254         "addiu    %[irow],     %[irow],     16            \n\t"
255         "mfhi     %[temp0],    $ac0                       \n\t"
256         "mfhi     %[temp1],    $ac1                       \n\t"
257         "mfhi     %[temp2],    $ac2                       \n\t"
258         "mfhi     %[temp3],    $ac3                       \n\t"
259         "mult     $ac0,        %[temp8],    %[temp9]      \n\t"
260         "maddu    $ac0,        %[temp0],    %[temp7]      \n\t"
261         "mult     $ac1,        %[temp8],    %[temp9]      \n\t"
262         "maddu    $ac1,        %[temp1],    %[temp7]      \n\t"
263         "mult     $ac2,        %[temp8],    %[temp9]      \n\t"
264         "maddu    $ac2,        %[temp2],    %[temp7]      \n\t"
265         "mult     $ac3,        %[temp8],    %[temp9]      \n\t"
266         "maddu    $ac3,        %[temp3],    %[temp7]      \n\t"
267         "mfhi     %[temp0],    $ac0                       \n\t"
268         "mfhi     %[temp1],    $ac1                       \n\t"
269         "mfhi     %[temp2],    $ac2                       \n\t"
270         "mfhi     %[temp3],    $ac3                       \n\t"
271         "sb       %[temp0],    -4(%[dst])                 \n\t"
272         "sb       %[temp1],    -3(%[dst])                 \n\t"
273         "sb       %[temp2],    -2(%[dst])                 \n\t"
274         "sb       %[temp3],    -1(%[dst])                 \n\t"
275         "bne      %[frow],     %[loop_end], 1b            \n\t"
276         : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
277           [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),
278           [irow]"+r"(irow), [dst]"+r"(dst), [loop_end]"=&r"(loop_end),
279           [temp8]"=&r"(temp8), [temp9]"=&r"(temp9), [temp10]"=&r"(temp10),
280           [temp11]"=&r"(temp11), [temp2]"=&r"(temp2)
281         : [temp7]"r"(temp7), [temp6]"r"(temp6), [A]"r"(A), [B]"r"(B)
282         : "memory", "hi", "lo", "$ac1hi", "$ac1lo",
283           "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"
284       );
285     }
286     for (i = 0; i < (x_out_max & 0x3); ++i) {
287       const uint64_t I = (uint64_t)A * *frow++
288                        + (uint64_t)B * *irow++;
289       const uint32_t J = (uint32_t)((I + ROUNDER) >> WEBP_RESCALER_RFIX);
290       const int v = (int)MULT_FIX(J, wrk->fy_scale);
291       *dst++ = (v > 255) ? 255u : (uint8_t)v;
292     }
293   }
294 }
295 
296 #undef MULT_FIX_FLOOR
297 #undef MULT_FIX
298 #undef ROUNDER
299 
300 //------------------------------------------------------------------------------
301 // Entry point
302 
303 extern void WebPRescalerDspInitMIPSdspR2(void);
304 
WebPRescalerDspInitMIPSdspR2(void)305 WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInitMIPSdspR2(void) {
306   WebPRescalerExportRowExpand = ExportRowExpand_MIPSdspR2;
307 //  WebPRescalerExportRowShrink = ExportRowShrink_MIPSdspR2;
308 }
309 
310 #else  // !WEBP_USE_MIPS_DSP_R2
311 
312 WEBP_DSP_INIT_STUB(WebPRescalerDspInitMIPSdspR2)
313 
314 #endif  // WEBP_USE_MIPS_DSP_R2
315