1 /* various interpolation templates
2  */
3 
4 /*
5 
6     This file is part of VIPS.
7 
8     VIPS is free software; you can redistribute it and/or modify
9     it under the terms of the GNU Lesser General Public License as published by
10     the Free Software Foundation; either version 2 of the License, or
11     (at your option) any later version.
12 
13     This program is distributed in the hope that it will be useful,
14     but WITHOUT ANY WARRANTY; without even the implied warranty of
15     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16     GNU Lesser General Public License for more details.
17 
18     You should have received a copy of the GNU Lesser General Public License
19     along with this program; if not, write to the Free Software
20     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21     02110-1301  USA
22 
23  */
24 
25 /*
26 
27     These files are distributed with VIPS - http://www.vips.ecs.soton.ac.uk
28 
29  */
30 
31 /*
32  * Various casts which assume that the data is already in range. (That
33  * is, they are to be used with monotone samplers.)
34  */
35 template <typename T> static T inline
to_fptypes(const double val)36 to_fptypes( const double val )
37 {
38 	const T newval = val;
39 
40 	return( newval );
41 }
42 
43 template <typename T> static T inline
to_withsign(const double val)44 to_withsign( const double val )
45 {
46 	const int sign_of_val = 2 * ( val >= 0. ) - 1;
47 	const int rounded_abs_val = .5 + sign_of_val * val;
48 	const T newval = sign_of_val * rounded_abs_val;
49 
50 	return( newval );
51 }
52 
53 template <typename T> static T inline
to_nosign(const double val)54 to_nosign( const double val )
55 {
56 	const T newval = .5 + val;
57 
58 	return( newval );
59 }
60 
61 /*
62  * Various bilinear implementation templates. Note that no clampling
63  * is used: There is an assumption that the data is such that
64  * over/underflow is not an issue:
65  */
66 
67 /*
68  * Bilinear interpolation for float and double types. The first four
69  * inputs are weights, the last four are the corresponding pixel
70  * values:
71  */
72 template <typename T> static T inline
bilinear_fptypes(const double w_times_z,const double x_times_z,const double w_times_y,const double x_times_y,const double tre_thr,const double tre_thrfou,const double trequa_thr,const double trequa_thrfou)73 bilinear_fptypes(
74 	const double w_times_z,
75 	const double x_times_z,
76 	const double w_times_y,
77 	const double x_times_y,
78 	const double tre_thr,
79 	const double tre_thrfou,
80 	const double trequa_thr,
81 	const double trequa_thrfou )
82 {
83 	const T newval =
84 		w_times_z * tre_thr +
85 		x_times_z * tre_thrfou +
86 		w_times_y * trequa_thr +
87 		x_times_y * trequa_thrfou;
88 
89 	return( newval );
90 }
91 
92 /*
93  * Bilinear interpolation for signed integer types:
94  */
95 template <typename T> static T inline
bilinear_withsign(const double w_times_z,const double x_times_z,const double w_times_y,const double x_times_y,const double tre_thr,const double tre_thrfou,const double trequa_thr,const double trequa_thrfou)96 bilinear_withsign(
97 	const double w_times_z,
98 	const double x_times_z,
99 	const double w_times_y,
100 	const double x_times_y,
101 	const double tre_thr,
102 	const double tre_thrfou,
103 	const double trequa_thr,
104 	const double trequa_thrfou )
105 {
106 	const double val =
107 		w_times_z * tre_thr +
108 		x_times_z * tre_thrfou +
109 		w_times_y * trequa_thr +
110 		x_times_y * trequa_thrfou;
111 
112 	const int sign_of_val = 2 * ( val >= 0. ) - 1;
113 
114 	const int rounded_abs_val = .5 + sign_of_val * val;
115 
116 	const T newval = sign_of_val * rounded_abs_val;
117 
118 	return( newval );
119 }
120 
121 /*
122  * Bilinear Interpolation for unsigned integer types:
123  */
124 template <typename T> static T inline
bilinear_nosign(const double w_times_z,const double x_times_z,const double w_times_y,const double x_times_y,const double tre_thr,const double tre_thrfou,const double trequa_thr,const double trequa_thrfou)125 bilinear_nosign(
126 	const double w_times_z,
127 	const double x_times_z,
128 	const double w_times_y,
129 	const double x_times_y,
130 	const double tre_thr,
131 	const double tre_thrfou,
132 	const double trequa_thr,
133 	const double trequa_thrfou )
134 {
135 	const T newval =
136 		w_times_z * tre_thr +
137 		x_times_z * tre_thrfou +
138 		w_times_y * trequa_thr +
139 		x_times_y * trequa_thrfou +
140 		0.5;
141 
142 	return( newval );
143 }
144 
145 /*
146  * Bicubic (Catmull-Rom) interpolation templates:
147  */
148 
149 static int inline
unsigned_fixed_round(int v)150 unsigned_fixed_round( int v )
151 {
152 	const int round_by = VIPS_INTERPOLATE_SCALE >> 1;
153 
154 	return( (v + round_by) >> VIPS_INTERPOLATE_SHIFT );
155 }
156 
157 /* Fixed-point integer bicubic, used for 8-bit types.
158  */
159 template <typename T> static int inline
bicubic_unsigned_int(const T uno_one,const T uno_two,const T uno_thr,const T uno_fou,const T dos_one,const T dos_two,const T dos_thr,const T dos_fou,const T tre_one,const T tre_two,const T tre_thr,const T tre_fou,const T qua_one,const T qua_two,const T qua_thr,const T qua_fou,const int * restrict cx,const int * restrict cy)160 bicubic_unsigned_int(
161 	const T uno_one, const T uno_two, const T uno_thr, const T uno_fou,
162 	const T dos_one, const T dos_two, const T dos_thr, const T dos_fou,
163 	const T tre_one, const T tre_two, const T tre_thr, const T tre_fou,
164 	const T qua_one, const T qua_two, const T qua_thr, const T qua_fou,
165 	const int* restrict cx, const int* restrict cy )
166 {
167 	const int c0 = cx[0];
168 	const int c1 = cx[1];
169 	const int c2 = cx[2];
170 	const int c3 = cx[3];
171 
172 	const int r0 = unsigned_fixed_round(
173 		c0 * uno_one +
174 		c1 * uno_two +
175 		c2 * uno_thr +
176 		c3 * uno_fou );
177 	const int r1 = unsigned_fixed_round(
178 		c0 * dos_one +
179 		c1 * dos_two +
180 		c2 * dos_thr +
181 		c3 * dos_fou );
182 	const int r2 = unsigned_fixed_round(
183 		c0 * tre_one +
184 		c1 * tre_two +
185 		c2 * tre_thr +
186 		c3 * tre_fou );
187 	const int r3 = unsigned_fixed_round(
188 		c0 * qua_one +
189 		c1 * qua_two +
190 		c2 * qua_thr +
191 		c3 * qua_fou );
192 
193 	return( unsigned_fixed_round(
194 		cy[0] * r0 +
195 		cy[1] * r1 +
196 		cy[2] * r2 +
197 		cy[3] * r3 ) );
198 }
199 
200 static int inline
signed_fixed_round(int v)201 signed_fixed_round( int v )
202 {
203 	const int sign_of_v = 2 * (v > 0) - 1;
204 	const int round_by = sign_of_v * (VIPS_INTERPOLATE_SCALE >> 1);
205 
206 	return( (v + round_by) >> VIPS_INTERPOLATE_SHIFT );
207 }
208 
209 /* Fixed-point integer bicubic, used for 8-bit types.
210  */
211 template <typename T> static int inline
bicubic_signed_int(const T uno_one,const T uno_two,const T uno_thr,const T uno_fou,const T dos_one,const T dos_two,const T dos_thr,const T dos_fou,const T tre_one,const T tre_two,const T tre_thr,const T tre_fou,const T qua_one,const T qua_two,const T qua_thr,const T qua_fou,const int * restrict cx,const int * restrict cy)212 bicubic_signed_int(
213 	const T uno_one, const T uno_two, const T uno_thr, const T uno_fou,
214 	const T dos_one, const T dos_two, const T dos_thr, const T dos_fou,
215 	const T tre_one, const T tre_two, const T tre_thr, const T tre_fou,
216 	const T qua_one, const T qua_two, const T qua_thr, const T qua_fou,
217 	const int* restrict cx, const int* restrict cy )
218 {
219 	const int c0 = cx[0];
220 	const int c1 = cx[1];
221 	const int c2 = cx[2];
222 	const int c3 = cx[3];
223 
224 	const int r0 = signed_fixed_round(
225 		c0 * uno_one +
226 		c1 * uno_two +
227 		c2 * uno_thr +
228 		c3 * uno_fou );
229 	const int r1 = signed_fixed_round(
230 		c0 * dos_one +
231 		c1 * dos_two +
232 		c2 * dos_thr +
233 		c3 * dos_fou );
234 	const int r2 = signed_fixed_round(
235 		c0 * tre_one +
236 		c1 * tre_two +
237 		c2 * tre_thr +
238 		c3 * tre_fou );
239 	const int r3 = signed_fixed_round(
240 		c0 * qua_one +
241 		c1 * qua_two +
242 		c2 * qua_thr +
243 		c3 * qua_fou );
244 
245 	return( signed_fixed_round(
246 		cy[0] * r0 +
247 		cy[1] * r1 +
248 		cy[2] * r2 +
249 		cy[3] * r3 ) );
250 }
251 
252 template <typename T> static T inline
cubic_float(const T one,const T two,const T thr,const T fou,const double * restrict cx)253 cubic_float(
254 	const T one, const T two, const T thr, const T fou,
255 	const double* restrict cx )
256 {
257 	return( cx[0] * one +
258 		 cx[1] * two +
259 		 cx[2] * thr +
260 		 cx[3] * fou );
261 }
262 
263 /* Floating-point bicubic, used for int/float/double types.
264  */
265 template <typename T> static T inline
bicubic_float(const T uno_one,const T uno_two,const T uno_thr,const T uno_fou,const T dos_one,const T dos_two,const T dos_thr,const T dos_fou,const T tre_one,const T tre_two,const T tre_thr,const T tre_fou,const T qua_one,const T qua_two,const T qua_thr,const T qua_fou,const double * restrict cx,const double * restrict cy)266 bicubic_float(
267 	const T uno_one, const T uno_two, const T uno_thr, const T uno_fou,
268 	const T dos_one, const T dos_two, const T dos_thr, const T dos_fou,
269 	const T tre_one, const T tre_two, const T tre_thr, const T tre_fou,
270 	const T qua_one, const T qua_two, const T qua_thr, const T qua_fou,
271 	const double* restrict cx, const double* restrict cy )
272 {
273 	const double r0 = cubic_float<T>(
274 		uno_one, uno_two, uno_thr, uno_fou, cx );
275 	const double r1 = cubic_float<T>(
276 		dos_one, dos_two, dos_thr, dos_fou, cx );
277 	const double r2 = cubic_float<T>(
278 		tre_one, tre_two, tre_thr, tre_fou, cx );
279 	const double r3 = cubic_float<T>(
280 		qua_one, qua_two, qua_thr, qua_fou, cx );
281 
282 	return( cubic_float<T>( r0, r1, r2, r3, cy ) );
283 }
284 
285 /* Given an offset in [0,1] (we can have x == 1 when building tables),
286  * calculate c0, c1, c2, c3, the catmull-rom coefficients. This is called
287  * from the interpolator as well as from the table builder.
288  */
289 static void inline
calculate_coefficients_catmull(double c[4],const double x)290 calculate_coefficients_catmull( double c[4], const double x )
291 {
292 	/* Nicolas believes that the following is an hitherto unknown
293 	 * hyper-efficient method of computing Catmull-Rom coefficients. It
294 	 * only uses 4* & 1+ & 5- for a total of only 10 flops to compute
295 	 * four coefficients.
296 	 */
297 	const double cr1  = 1. - x;
298 	const double cr2  = -.5 * x;
299 	const double cr3  = cr1 * cr2;
300 	const double cone = cr1 * cr3;
301 	const double cfou = x * cr3;
302 	const double cr4  = cfou - cone;
303 	const double ctwo = cr1 - cone + cr4;
304 	const double cthr = x - cfou - cr4;
305 
306 	g_assert( x >= 0. && x <= 1. );
307 
308 	c[0] = cone;
309 	c[3] = cfou;
310 	c[1] = ctwo;
311 	c[2] = cthr;
312 }
313 
314 /* Given an x in [0,1] (we can have x == 1 when building tables),
315  * calculate c0 .. c(@shrink + 1), the triangle coefficients. This is called
316  * from the interpolator as well as from the table builder.
317  */
318 static void inline
calculate_coefficients_triangle(double * c,const double shrink,const double x)319 calculate_coefficients_triangle( double *c,
320 	const double shrink, const double x )
321 {
322 	/* Needs to be in sync with vips_reduce_get_points().
323 	 */
324 	const int n_points = 2 * rint( shrink ) + 1;
325 	const double half = x + n_points / 2.0 - 1;
326 
327 	int i;
328 	double sum;
329 
330 	sum = 0;
331 	for( i = 0; i < n_points; i++ ) {
332 		const double xp = (i - half) / shrink;
333 
334 		double l;
335 
336 		l = 1.0 - VIPS_FABS( xp );
337 		l = VIPS_MAX( 0.0, l );
338 
339 		c[i] = l;
340 		sum += l;
341 	}
342 
343 	for( i = 0; i < n_points; i++ )
344 		c[i] /= sum;
345 }
346 
347 /* Generate a cubic filter. See:
348  *
349  * Mitchell and Netravali, Reconstruction Filters in Computer Graphics
350  * Computer Graphics, Volume 22, Number 4, August 1988.
351  *
352  * B = 1,   C = 0   - cubic B-spline
353  * B = 1/3, C = 1/3 - Mitchell
354  * B = 0,   C = 1/2 - Catmull-Rom spline
355  */
356 static void inline
calculate_coefficients_cubic(double * c,const double shrink,const double x,double B,double C)357 calculate_coefficients_cubic( double *c,
358 	const double shrink, const double x, double B, double C )
359 {
360 	/* Needs to be in sync with vips_reduce_get_points().
361 	 */
362 	const int n_points = 2 * rint( 2 * shrink ) + 1;
363 	const double half = x + n_points / 2.0 - 1;
364 
365 	int i;
366 	double sum;
367 
368 	sum = 0;
369 	for( i = 0; i < n_points; i++ ) {
370 		const double xp = (i - half) / shrink;
371 		const double axp = VIPS_FABS( xp );
372 		const double axp2 = axp * axp;
373 		const double axp3 = axp2 * axp;
374 
375 		double l;
376 
377 		if( axp <= 1 )
378 			l = ((12 - 9 * B - 6 * C) * axp3 +
379 			     (-18 + 12 * B + 6 * C) * axp2 +
380 			     (6 - 2 * B)) / 6;
381 		else if( axp <= 2 )
382 			l = ((-B - 6 * C) * axp3 +
383 			     (6 * B + 30 * C) * axp2 +
384 			     (-12 * B - 48 * C) * axp +
385 			     (8 * B + 24 * C)) / 6;
386 		else
387 			l = 0.0;
388 
389 		c[i] = l;
390 		sum += l;
391 	}
392 
393 	for( i = 0; i < n_points; i++ )
394 		c[i] /= sum;
395 }
396 
397 /* Given an x in [0,1] (we can have x == 1 when building tables),
398  * calculate c0 .. c(@a * @shrink + 1), the lanczos coefficients. This is called
399  * from the interpolator as well as from the table builder.
400  *
401  * @a is the number of lobes, so usually 2 or 3. @shrink is the reduction
402  * factor, so 1 for interpolation, 2 for a x2 reduction, etc. We need more
403  * points for large decimations to avoid aliasing.
404  */
405 static void inline
calculate_coefficients_lanczos(double * c,const int a,const double shrink,const double x)406 calculate_coefficients_lanczos( double *c,
407 	const int a, const double shrink, const double x )
408 {
409 	/* Needs to be in sync with vips_reduce_get_points().
410 	 */
411 	const int n_points = 2 * rint( a * shrink ) + 1;
412 	const double half = x + n_points / 2.0 - 1;
413 
414 	int i;
415 	double sum;
416 
417 	sum = 0;
418 	for( i = 0; i < n_points; i++ ) {
419 		const double xp = (i - half) / shrink;
420 
421 		double l;
422 
423 		if( xp == 0.0 )
424 			l = 1.0;
425 		else if( xp < -a )
426 			l = 0.0;
427 		else if( xp > a )
428 			l = 0.0;
429 		else
430 			l = (double) a * sin( VIPS_PI * xp ) *
431 				sin( VIPS_PI * xp / (double) a ) /
432 				(VIPS_PI * VIPS_PI * xp * xp);
433 
434 		c[i] = l;
435 		sum += l;
436 	}
437 
438 	for( i = 0; i < n_points; i++ )
439 		c[i] /= sum;
440 }
441 
442 /* Our inner loop for resampling with a convolution. Operate on elements of
443  * type T, gather results in an intermediate of type IT.
444  */
445 template <typename T, typename IT>
446 static IT
reduce_sum(const T * restrict in,int stride,const IT * restrict c,int n)447 reduce_sum( const T * restrict in, int stride, const IT * restrict c, int n )
448 {
449 	IT sum;
450 
451 	sum = 0;
452 	for( int i = 0; i < n; i++ ) {
453 		sum += c[i] * in[0];
454 		in += stride;
455 	}
456 
457 	return( sum );
458 }
459