1 // Tencent is pleased to support the open source community by making ncnn available.
2 //
3 // Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
4 //
5 // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
7 //
8 // https://opensource.org/licenses/BSD-3-Clause
9 //
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
14 
interpolate_cubic(float fx,float * coeffs)15 static inline void interpolate_cubic(float fx, float* coeffs)
16 {
17     const float A = -0.75f;
18 
19     float fx0 = fx + 1;
20     float fx1 = fx;
21     float fx2 = 1 - fx;
22     // float fx3 = 2 - fx;
23 
24     coeffs[0] = A * fx0 * fx0 * fx0 - 5 * A * fx0 * fx0 + 8 * A * fx0 - 4 * A;
25     coeffs[1] = (A + 2) * fx1 * fx1 * fx1 - (A + 3) * fx1 * fx1 + 1;
26     coeffs[2] = (A + 2) * fx2 * fx2 * fx2 - (A + 3) * fx2 * fx2 + 1;
27     coeffs[3] = 1.f - coeffs[0] - coeffs[1] - coeffs[2];
28 }
29 
cubic_coeffs(int w,int outw,int * xofs,float * alpha,int align_corner)30 static void cubic_coeffs(int w, int outw, int* xofs, float* alpha, int align_corner)
31 {
32     double scale = (double)w / outw;
33     if (align_corner)
34     {
35         scale = (double)(w - 1) / (outw - 1);
36     }
37 
38     for (int dx = 0; dx < outw; dx++)
39     {
40         float fx = (float)((dx + 0.5) * scale - 0.5);
41         if (align_corner)
42         {
43             fx = (float)(dx * scale);
44         }
45 
46         int sx = static_cast<int>(floor(fx));
47         fx -= sx;
48 
49         interpolate_cubic(fx, alpha + dx * 4);
50 
51         if (sx <= -1)
52         {
53             sx = 1;
54             alpha[dx * 4 + 0] = 1.f - alpha[dx * 4 + 3];
55             alpha[dx * 4 + 1] = alpha[dx * 4 + 3];
56             alpha[dx * 4 + 2] = 0.f;
57             alpha[dx * 4 + 3] = 0.f;
58         }
59         if (sx == 0)
60         {
61             sx = 1;
62             alpha[dx * 4 + 0] = alpha[dx * 4 + 0] + alpha[dx * 4 + 1];
63             alpha[dx * 4 + 1] = alpha[dx * 4 + 2];
64             alpha[dx * 4 + 2] = alpha[dx * 4 + 3];
65             alpha[dx * 4 + 3] = 0.f;
66         }
67         if (sx == w - 2)
68         {
69             sx = w - 3;
70             alpha[dx * 4 + 3] = alpha[dx * 4 + 2] + alpha[dx * 4 + 3];
71             alpha[dx * 4 + 2] = alpha[dx * 4 + 1];
72             alpha[dx * 4 + 1] = alpha[dx * 4 + 0];
73             alpha[dx * 4 + 0] = 0.f;
74         }
75         if (sx >= w - 1)
76         {
77             sx = w - 3;
78             alpha[dx * 4 + 3] = 1.f - alpha[dx * 4 + 0];
79             alpha[dx * 4 + 2] = alpha[dx * 4 + 0];
80             alpha[dx * 4 + 1] = 0.f;
81             alpha[dx * 4 + 0] = 0.f;
82         }
83 
84         xofs[dx] = sx;
85     }
86 }
87 
resize_bicubic_image(const Mat & src,Mat & dst,float * alpha,int * xofs,float * beta,int * yofs)88 static void resize_bicubic_image(const Mat& src, Mat& dst, float* alpha, int* xofs, float* beta, int* yofs)
89 {
90     int w = dst.w;
91     int h = dst.h;
92 
93     // loop body
94     Mat rowsbuf0(w);
95     Mat rowsbuf1(w);
96     Mat rowsbuf2(w);
97     Mat rowsbuf3(w);
98     float* rows0 = rowsbuf0;
99     float* rows1 = rowsbuf1;
100     float* rows2 = rowsbuf2;
101     float* rows3 = rowsbuf3;
102 
103     int prev_sy1 = -3;
104 
105     for (int dy = 0; dy < h; dy++)
106     {
107         int sy = yofs[dy];
108 
109         if (sy == prev_sy1)
110         {
111             // reuse all rows
112         }
113         else if (sy == prev_sy1 + 1)
114         {
115             // hresize one row
116             float* rows0_old = rows0;
117             rows0 = rows1;
118             rows1 = rows2;
119             rows2 = rows3;
120             rows3 = rows0_old;
121             const float* S3 = src.row(sy + 2);
122 
123             const float* alphap = alpha;
124             float* rows3p = rows3;
125             for (int dx = 0; dx < w; dx++)
126             {
127                 int sx = xofs[dx];
128                 const float* S3p = S3 + sx;
129 
130                 float a0 = alphap[0];
131                 float a1 = alphap[1];
132                 float a2 = alphap[2];
133                 float a3 = alphap[3];
134                 rows3p[dx] = S3p[-1] * a0 + S3p[0] * a1 + S3p[1] * a2 + S3p[2] * a3;
135 
136                 alphap += 4;
137             }
138         }
139         else if (sy == prev_sy1 + 2)
140         {
141             // hresize two rows
142             float* rows0_old = rows0;
143             float* rows1_old = rows1;
144             rows0 = rows2;
145             rows1 = rows3;
146             rows2 = rows0_old;
147             rows3 = rows1_old;
148             const float* S2 = src.row(sy + 1);
149             const float* S3 = src.row(sy + 2);
150 
151             const float* alphap = alpha;
152             float* rows2p = rows2;
153             float* rows3p = rows3;
154             for (int dx = 0; dx < w; dx++)
155             {
156                 int sx = xofs[dx];
157                 const float* S2p = S2 + sx;
158                 const float* S3p = S3 + sx;
159 
160                 float a0 = alphap[0];
161                 float a1 = alphap[1];
162                 float a2 = alphap[2];
163                 float a3 = alphap[3];
164                 rows2p[dx] = S2p[-1] * a0 + S2p[0] * a1 + S2p[1] * a2 + S2p[2] * a3;
165                 rows3p[dx] = S3p[-1] * a0 + S3p[0] * a1 + S3p[1] * a2 + S3p[2] * a3;
166 
167                 alphap += 4;
168             }
169         }
170         else if (sy == prev_sy1 + 3)
171         {
172             // hresize three rows
173             float* rows0_old = rows0;
174             float* rows1_old = rows1;
175             float* rows2_old = rows2;
176             rows0 = rows3;
177             rows1 = rows0_old;
178             rows2 = rows1_old;
179             rows3 = rows2_old;
180             const float* S1 = src.row(sy);
181             const float* S2 = src.row(sy + 1);
182             const float* S3 = src.row(sy + 2);
183 
184             const float* alphap = alpha;
185             float* rows1p = rows1;
186             float* rows2p = rows2;
187             float* rows3p = rows3;
188             for (int dx = 0; dx < w; dx++)
189             {
190                 int sx = xofs[dx];
191                 const float* S1p = S1 + sx;
192                 const float* S2p = S2 + sx;
193                 const float* S3p = S3 + sx;
194 
195                 float a0 = alphap[0];
196                 float a1 = alphap[1];
197                 float a2 = alphap[2];
198                 float a3 = alphap[3];
199                 rows1p[dx] = S1p[-1] * a0 + S1p[0] * a1 + S1p[1] * a2 + S1p[2] * a3;
200                 rows2p[dx] = S2p[-1] * a0 + S2p[0] * a1 + S2p[1] * a2 + S2p[2] * a3;
201                 rows3p[dx] = S3p[-1] * a0 + S3p[0] * a1 + S3p[1] * a2 + S3p[2] * a3;
202 
203                 alphap += 4;
204             }
205         }
206         else
207         {
208             // hresize four rows
209             const float* S0 = src.row(sy - 1);
210             const float* S1 = src.row(sy);
211             const float* S2 = src.row(sy + 1);
212             const float* S3 = src.row(sy + 2);
213 
214             const float* alphap = alpha;
215             float* rows0p = rows0;
216             float* rows1p = rows1;
217             float* rows2p = rows2;
218             float* rows3p = rows3;
219             for (int dx = 0; dx < w; dx++)
220             {
221                 int sx = xofs[dx];
222                 const float* S0p = S0 + sx;
223                 const float* S1p = S1 + sx;
224                 const float* S2p = S2 + sx;
225                 const float* S3p = S3 + sx;
226 
227                 float a0 = alphap[0];
228                 float a1 = alphap[1];
229                 float a2 = alphap[2];
230                 float a3 = alphap[3];
231                 rows0p[dx] = S0p[-1] * a0 + S0p[0] * a1 + S0p[1] * a2 + S0p[2] * a3;
232                 rows1p[dx] = S1p[-1] * a0 + S1p[0] * a1 + S1p[1] * a2 + S1p[2] * a3;
233                 rows2p[dx] = S2p[-1] * a0 + S2p[0] * a1 + S2p[1] * a2 + S2p[2] * a3;
234                 rows3p[dx] = S3p[-1] * a0 + S3p[0] * a1 + S3p[1] * a2 + S3p[2] * a3;
235 
236                 alphap += 4;
237             }
238         }
239 
240         prev_sy1 = sy;
241 
242         // vresize
243         float b0 = beta[0];
244         float b1 = beta[1];
245         float b2 = beta[2];
246         float b3 = beta[3];
247 
248         float* rows0p = rows0;
249         float* rows1p = rows1;
250         float* rows2p = rows2;
251         float* rows3p = rows3;
252         float* Dp = dst.row(dy);
253         for (int dx = 0; dx < w; dx++)
254         {
255             //             D[x] = rows0[x]*b0 + rows1[x]*b1 + rows2[x]*b2 + rows3[x]*b3;
256             *Dp++ = *rows0p++ * b0 + *rows1p++ * b1 + *rows2p++ * b2 + *rows3p++ * b3;
257         }
258 
259         beta += 4;
260     }
261 }
262