1 // Tencent is pleased to support the open source community by making ncnn available.
2 //
3 // Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
4 //
5 // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
7 //
8 // https://opensource.org/licenses/BSD-3-Clause
9 //
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
14
interpolate_cubic(float fx,float * coeffs)15 static inline void interpolate_cubic(float fx, float* coeffs)
16 {
17 const float A = -0.75f;
18
19 float fx0 = fx + 1;
20 float fx1 = fx;
21 float fx2 = 1 - fx;
22 // float fx3 = 2 - fx;
23
24 coeffs[0] = A * fx0 * fx0 * fx0 - 5 * A * fx0 * fx0 + 8 * A * fx0 - 4 * A;
25 coeffs[1] = (A + 2) * fx1 * fx1 * fx1 - (A + 3) * fx1 * fx1 + 1;
26 coeffs[2] = (A + 2) * fx2 * fx2 * fx2 - (A + 3) * fx2 * fx2 + 1;
27 coeffs[3] = 1.f - coeffs[0] - coeffs[1] - coeffs[2];
28 }
29
cubic_coeffs(int w,int outw,int * xofs,float * alpha,int align_corner)30 static void cubic_coeffs(int w, int outw, int* xofs, float* alpha, int align_corner)
31 {
32 double scale = (double)w / outw;
33 if (align_corner)
34 {
35 scale = (double)(w - 1) / (outw - 1);
36 }
37
38 for (int dx = 0; dx < outw; dx++)
39 {
40 float fx = (float)((dx + 0.5) * scale - 0.5);
41 if (align_corner)
42 {
43 fx = (float)(dx * scale);
44 }
45
46 int sx = static_cast<int>(floor(fx));
47 fx -= sx;
48
49 interpolate_cubic(fx, alpha + dx * 4);
50
51 if (sx <= -1)
52 {
53 sx = 1;
54 alpha[dx * 4 + 0] = 1.f - alpha[dx * 4 + 3];
55 alpha[dx * 4 + 1] = alpha[dx * 4 + 3];
56 alpha[dx * 4 + 2] = 0.f;
57 alpha[dx * 4 + 3] = 0.f;
58 }
59 if (sx == 0)
60 {
61 sx = 1;
62 alpha[dx * 4 + 0] = alpha[dx * 4 + 0] + alpha[dx * 4 + 1];
63 alpha[dx * 4 + 1] = alpha[dx * 4 + 2];
64 alpha[dx * 4 + 2] = alpha[dx * 4 + 3];
65 alpha[dx * 4 + 3] = 0.f;
66 }
67 if (sx == w - 2)
68 {
69 sx = w - 3;
70 alpha[dx * 4 + 3] = alpha[dx * 4 + 2] + alpha[dx * 4 + 3];
71 alpha[dx * 4 + 2] = alpha[dx * 4 + 1];
72 alpha[dx * 4 + 1] = alpha[dx * 4 + 0];
73 alpha[dx * 4 + 0] = 0.f;
74 }
75 if (sx >= w - 1)
76 {
77 sx = w - 3;
78 alpha[dx * 4 + 3] = 1.f - alpha[dx * 4 + 0];
79 alpha[dx * 4 + 2] = alpha[dx * 4 + 0];
80 alpha[dx * 4 + 1] = 0.f;
81 alpha[dx * 4 + 0] = 0.f;
82 }
83
84 xofs[dx] = sx;
85 }
86 }
87
resize_bicubic_image(const Mat & src,Mat & dst,float * alpha,int * xofs,float * beta,int * yofs)88 static void resize_bicubic_image(const Mat& src, Mat& dst, float* alpha, int* xofs, float* beta, int* yofs)
89 {
90 int w = dst.w;
91 int h = dst.h;
92
93 // loop body
94 Mat rowsbuf0(w);
95 Mat rowsbuf1(w);
96 Mat rowsbuf2(w);
97 Mat rowsbuf3(w);
98 float* rows0 = rowsbuf0;
99 float* rows1 = rowsbuf1;
100 float* rows2 = rowsbuf2;
101 float* rows3 = rowsbuf3;
102
103 int prev_sy1 = -3;
104
105 for (int dy = 0; dy < h; dy++)
106 {
107 int sy = yofs[dy];
108
109 if (sy == prev_sy1)
110 {
111 // reuse all rows
112 }
113 else if (sy == prev_sy1 + 1)
114 {
115 // hresize one row
116 float* rows0_old = rows0;
117 rows0 = rows1;
118 rows1 = rows2;
119 rows2 = rows3;
120 rows3 = rows0_old;
121 const float* S3 = src.row(sy + 2);
122
123 const float* alphap = alpha;
124 float* rows3p = rows3;
125 for (int dx = 0; dx < w; dx++)
126 {
127 int sx = xofs[dx];
128 const float* S3p = S3 + sx;
129
130 float a0 = alphap[0];
131 float a1 = alphap[1];
132 float a2 = alphap[2];
133 float a3 = alphap[3];
134 rows3p[dx] = S3p[-1] * a0 + S3p[0] * a1 + S3p[1] * a2 + S3p[2] * a3;
135
136 alphap += 4;
137 }
138 }
139 else if (sy == prev_sy1 + 2)
140 {
141 // hresize two rows
142 float* rows0_old = rows0;
143 float* rows1_old = rows1;
144 rows0 = rows2;
145 rows1 = rows3;
146 rows2 = rows0_old;
147 rows3 = rows1_old;
148 const float* S2 = src.row(sy + 1);
149 const float* S3 = src.row(sy + 2);
150
151 const float* alphap = alpha;
152 float* rows2p = rows2;
153 float* rows3p = rows3;
154 for (int dx = 0; dx < w; dx++)
155 {
156 int sx = xofs[dx];
157 const float* S2p = S2 + sx;
158 const float* S3p = S3 + sx;
159
160 float a0 = alphap[0];
161 float a1 = alphap[1];
162 float a2 = alphap[2];
163 float a3 = alphap[3];
164 rows2p[dx] = S2p[-1] * a0 + S2p[0] * a1 + S2p[1] * a2 + S2p[2] * a3;
165 rows3p[dx] = S3p[-1] * a0 + S3p[0] * a1 + S3p[1] * a2 + S3p[2] * a3;
166
167 alphap += 4;
168 }
169 }
170 else if (sy == prev_sy1 + 3)
171 {
172 // hresize three rows
173 float* rows0_old = rows0;
174 float* rows1_old = rows1;
175 float* rows2_old = rows2;
176 rows0 = rows3;
177 rows1 = rows0_old;
178 rows2 = rows1_old;
179 rows3 = rows2_old;
180 const float* S1 = src.row(sy);
181 const float* S2 = src.row(sy + 1);
182 const float* S3 = src.row(sy + 2);
183
184 const float* alphap = alpha;
185 float* rows1p = rows1;
186 float* rows2p = rows2;
187 float* rows3p = rows3;
188 for (int dx = 0; dx < w; dx++)
189 {
190 int sx = xofs[dx];
191 const float* S1p = S1 + sx;
192 const float* S2p = S2 + sx;
193 const float* S3p = S3 + sx;
194
195 float a0 = alphap[0];
196 float a1 = alphap[1];
197 float a2 = alphap[2];
198 float a3 = alphap[3];
199 rows1p[dx] = S1p[-1] * a0 + S1p[0] * a1 + S1p[1] * a2 + S1p[2] * a3;
200 rows2p[dx] = S2p[-1] * a0 + S2p[0] * a1 + S2p[1] * a2 + S2p[2] * a3;
201 rows3p[dx] = S3p[-1] * a0 + S3p[0] * a1 + S3p[1] * a2 + S3p[2] * a3;
202
203 alphap += 4;
204 }
205 }
206 else
207 {
208 // hresize four rows
209 const float* S0 = src.row(sy - 1);
210 const float* S1 = src.row(sy);
211 const float* S2 = src.row(sy + 1);
212 const float* S3 = src.row(sy + 2);
213
214 const float* alphap = alpha;
215 float* rows0p = rows0;
216 float* rows1p = rows1;
217 float* rows2p = rows2;
218 float* rows3p = rows3;
219 for (int dx = 0; dx < w; dx++)
220 {
221 int sx = xofs[dx];
222 const float* S0p = S0 + sx;
223 const float* S1p = S1 + sx;
224 const float* S2p = S2 + sx;
225 const float* S3p = S3 + sx;
226
227 float a0 = alphap[0];
228 float a1 = alphap[1];
229 float a2 = alphap[2];
230 float a3 = alphap[3];
231 rows0p[dx] = S0p[-1] * a0 + S0p[0] * a1 + S0p[1] * a2 + S0p[2] * a3;
232 rows1p[dx] = S1p[-1] * a0 + S1p[0] * a1 + S1p[1] * a2 + S1p[2] * a3;
233 rows2p[dx] = S2p[-1] * a0 + S2p[0] * a1 + S2p[1] * a2 + S2p[2] * a3;
234 rows3p[dx] = S3p[-1] * a0 + S3p[0] * a1 + S3p[1] * a2 + S3p[2] * a3;
235
236 alphap += 4;
237 }
238 }
239
240 prev_sy1 = sy;
241
242 // vresize
243 float b0 = beta[0];
244 float b1 = beta[1];
245 float b2 = beta[2];
246 float b3 = beta[3];
247
248 float* rows0p = rows0;
249 float* rows1p = rows1;
250 float* rows2p = rows2;
251 float* rows3p = rows3;
252 float* Dp = dst.row(dy);
253 for (int dx = 0; dx < w; dx++)
254 {
255 // D[x] = rows0[x]*b0 + rows1[x]*b1 + rows2[x]*b2 + rows3[x]*b3;
256 *Dp++ = *rows0p++ * b0 + *rows1p++ * b1 + *rows2p++ * b2 + *rows3p++ * b3;
257 }
258
259 beta += 4;
260 }
261 }
262