1 /*
2  *  weighting.c
3  *
4  *     Copyright (C) Charles 'Buck' Krasic - April 2000
5  *     Copyright (C) Erik Walthinsen - April 2000
6  *
7  *  This file is part of libdv, a free DV (IEC 61834/SMPTE 314M)
8  *  codec.
9  *
10  *  libdv is free software; you can redistribute it and/or modify it
11  *  under the terms of the GNU Lesser Public License as published by
12  *  the Free Software Foundation; either version 2.1, or (at your
13  *  option) any later version.
14  *
15  *  libdv is distributed in the hope that it will be useful, but
16  *  WITHOUT ANY WARRANTY; without even the implied warranty of
17  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  *  Lesser Public License for more details.
19  *
20  *  You should have received a copy of the GNU Lesser Public License
21  *  along with libdv; see the file COPYING.  If not, write to
22  *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23  *
24  *  The libdv homepage is http://libdv.sourceforge.net/.
25  */
26 
27 /** @file
28  *  @ingroup weighting
29  *  @brief Implementation for @link weighting Coefficient Weighting @endlink
30  */
31 
32 /** @weakgroup weighting Coefficient Weighting
33  *
34  *  DCT coeffictions are stored with 10 bits of precision, although
35  *  there is the possibility that the DCT transform will produce
36  *  values outside of this range.  The weighting operation ensures
37  *  that all values fit into the allocated 10 bit range.
38  *
39  *  @{
40  */
41 
42 #if HAVE_CONFIG_H
43 # include <config.h>
44 #endif
45 
46 #include <math.h>
47 
48 #include "weighting.h"
49 
50 dv_coeff_t preSC[64] ALIGN32 = {
51 	16384,22725,21407,19266, 16384,12873,8867,4520,
52 	22725,31521,29692,26722, 22725,17855,12299,6270,
53 	21407,29692,27969,25172, 21407,16819,11585,5906,
54 	19266,26722,25172,22654, 19266,15137,10426,5315,
55 
56 	16384,22725,21407,19266, 16384,12873,8867,4520,
57 	12873,17855,16819,15137, 25746,20228,13933,7103,
58 	17734,24598,23170,20853, 17734,13933,9597,4892,
59 	18081,25080,23624,21261, 18081,14206,9785,4988
60 };
61 
62 dv_coeff_t postSC88[64] ALIGN32;
63 dv_coeff_t postSC248[64] ALIGN32;
64 
65 static double W[8];
66 
67 #if (!ARCH_X86) && (!ARCH_X86_64)
68 static dv_coeff_t dv_weight_inverse_88_matrix[64];
69 #endif
70 
71 #if BRUTE_FORCE_DCT_88
72 static double dv_weight_88_matrix[64];
73 #endif
74 #if BRUTE_FORCE_DCT_248
75 static double dv_weight_248_matrix[64];
76 #endif
77 
78 double dv_weight_inverse_248_matrix[64];
79 
80 
CS(int m)81 static inline double CS(int m) {
82   return cos(((double)m) * M_PI / 16.0);
83 }
84 
85 static void weight_88_inverse_float(double *block);
86 static void weight_88_float(double *block);
87 static void weight_248_float(double *block);
88 
int_val(double f)89 static inline short int_val(double f)
90 {
91 	return (short) floor(f + 0.5);
92 }
93 
postscale88_init(double * post_sc)94 static void postscale88_init(double* post_sc)
95 {
96 	int i,j;
97 	double ci,cj;
98 
99 	for( i = 0; i < 8; i++ ) {
100 		ci = i==0 ? 1/(8.*sqrt(2.)) : 1.0/16.0;
101 		/* di = i==0 ? 1.5/(sqrt(2.)) : 0.5;
102 		   ps3[i] = 2.0*2.0*ci/cos(i*M_PI/16);
103 		   israelh. this is table1 from AAN paper.
104 		   Note the trick if 8 or 16 deivision
105 		*/
106 		for( j = 0; j < 8; j++) {
107 			cj = j==0 ? 1/(8*sqrt(2.)) : 1.0/16.0;
108 			post_sc[i * 8 + j] = 4.0*4.0 * ci * cj /
109 				(cos(i*M_PI/16)*cos(j*M_PI/16));
110 			/* israelh. patch the first 4.0? */
111 		}
112 	}
113 	post_sc[63] = 1.0;
114 }
115 
postscale248_init(double * post_sc)116 static void postscale248_init(double* post_sc)
117 {
118 	int i,j;
119 	double ci,cj;
120 
121 	for( i = 0; i < 4; i++ ) {
122 		ci = i==0 ? 1/(4.*sqrt(2.)) : 1.0/8.0;
123 		/* di = i==0 ? 1.5/(sqrt(2.)) : 0.5;
124 		   ps3[i] = 2.0*2.0*ci/cos(i*M_PI/16);
125 		   israelh. this is table1 from AAN paper.
126 		   Note the trick if 8 or 16 deivision
127 		*/
128 		for( j = 0; j < 8; j++) {
129 			cj = j==0 ? 1/(8*sqrt(2.)) : 1.0/16.0;
130 			post_sc[i * 8 + j] = 4.0*2.0 * ci * cj /
131 				(cos(i*M_PI/8)*cos(j*M_PI/16));
132 			post_sc[i * 8 + 32 + j] = 4.0*2.0 * ci * cj /
133 				(cos(i*M_PI/8)*cos(j*M_PI/16));
134 			/* israelh. patch the first 4.0? */
135 		}
136 	}
137 	post_sc[63-32] = 1.0;
138 	post_sc[63] = 1.0;
139 }
140 
_dv_weight_init(void)141 void _dv_weight_init(void)
142 {
143 	double temp[64];
144 	double temp_postsc[64];
145 	int i, z, x;
146 #if ARCH_X86 || ARCH_X86_64
147 	const double dv_weight_bias_factor = (double)(1UL << DV_WEIGHT_BIAS);
148 #endif
149 
150 	W[0] = 1.0;
151 	W[1] = CS(4) / (4.0 * CS(7) * CS(2));
152 	W[2] = CS(4) / (2.0 * CS(6));
153 	W[3] = 1.0 / (2 * CS(5));
154 	W[4] = 7.0 / 8.0;
155 	W[5] = CS(4) / CS(3);
156 	W[6] = CS(4) / CS(2);
157 	W[7] = CS(4) / CS(1);
158 
159 	for (i = 0; i < 64; i++) {
160 		temp[i] = 1.0;
161 	}
162 	weight_88_inverse_float(temp);
163 
164 	for (i=0;i<64;i++) {
165 #if (!ARCH_X86) && (!ARCH_X86_64)
166 		dv_weight_inverse_88_matrix[i] = (dv_coeff_t)rint(temp[i]);
167 #else
168 		/* If we're using MMX assembler, fold weights into the iDCT
169 		   prescale */
170 		preSC[i] *= temp[i] * (16.0 / dv_weight_bias_factor);
171 #endif
172 	}
173 
174 	postscale88_init(temp_postsc);
175 	for (i = 0; i < 64; i++) {
176 		temp[i] = 1.0;
177 	}
178 	weight_88_float(temp);
179 
180 	for (i=0;i<64;i++) {
181 #if BRUTE_FORCE_DCT_88
182 		dv_weight_88_matrix[i] = temp[i];
183 #else
184 		/* If we're not using brute force(tm),
185 		   fold weights into the DCT
186 		   postscale */
187 		postSC88[i]= int_val(temp_postsc[i] * temp[i] * 32768.0 * 2.0);
188 #endif
189 	}
190 	postSC88[63] = temp[63] * 32768 * 2.0;
191 
192 	postscale248_init(temp_postsc);
193 
194 	for (i = 0; i < 64; i++) {
195 		temp[i] = 1.0;
196 	}
197 	weight_248_float(temp);
198 
199 	for (i=0;i<64;i++) {
200 #if BRUTE_FORCE_DCT_248
201 		dv_weight_248_matrix[i] = temp[i];
202 #else
203 		/* If we're not using brute force(tm),
204 		   fold weights into the DCT
205 		   postscale */
206 		postSC248[i]= int_val(temp_postsc[i]* temp[i] * 32768.0 * 2.0);
207 #endif
208 	}
209 
210 	for (z=0;z<4;z++) {
211 		for (x=0;x<8;x++) {
212 			dv_weight_inverse_248_matrix[z*8+x] =
213 				2.0 / (W[x] * W[2*z]);
214 			dv_weight_inverse_248_matrix[(z+4)*8+x] =
215 				2.0 / (W[x] * W[2*z]);
216 
217 		}
218 	}
219 	dv_weight_inverse_248_matrix[0] = 4.0;
220 }
221 
_dv_weight_88(dv_coeff_t * block)222 void _dv_weight_88(dv_coeff_t *block)
223 {
224 	/* These weights are now folded into the dct postscaler - so this
225 	   function doesn't do anything. */
226 #if BRUTE_FORCE_DCT_88
227 	int i;
228 
229 	for (i=0;i<64;i++) {
230 		block[i] *= dv_weight_88_matrix[i];
231 	}
232 #endif
233 }
234 
weight_88_float(double * block)235 static void weight_88_float(double *block)
236 {
237 	int x,y;
238 	double dc;
239 
240 	dc = block[0];
241 	for (y=0;y<8;y++) {
242 		for (x=0;x<8;x++) {
243 			block[y*8+x] *= W[x] * W[y] / 2.0;
244 		}
245 	}
246 	block[0] = dc / 4.0;
247 }
248 
249 
_dv_weight_248(dv_coeff_t * block)250 void _dv_weight_248(dv_coeff_t *block)
251 {
252 	/* These weights are now folded into the dct postscaler - so this
253 	   function doesn't do anything. */
254 #if BRUTE_FORCE_DCT_248
255 	int i;
256 
257 	for (i=0;i<64;i++) {
258 		block[i] *= dv_weight_248_matrix[i];
259 	}
260 #endif
261 }
262 
weight_248_float(double * block)263 static void weight_248_float(double *block)
264 {
265 	int x,z;
266 	double dc;
267 
268 	dc = block[0];
269 	for (z=0;z<4;z++) {
270 		for (x=0;x<8;x++) {
271 			block[z*8+x] *= W[x] * W[2*z] / 2;
272 			block[(z+4)*8+x] *= W[x] * W[2*z] / 2;
273 		}
274 	}
275 	block[0] = dc / 4;
276 	block[32] = dc / 4;
277 }
278 
279 
weight_88_inverse_float(double * block)280 static void weight_88_inverse_float(double *block)
281 {
282 	int x,y;
283 	double dc;
284 
285 	dc = block[0];
286 	for (y=0;y<8;y++) {
287 		for (x=0;x<8;x++) {
288 			block[y*8+x] /= (W[x] * W[y] / 2.0);
289 		}
290 	}
291 	block[0] = dc * 4.0;
292 }
293 
_dv_weight_88_inverse(dv_coeff_t * block)294 void _dv_weight_88_inverse(dv_coeff_t *block)
295 {
296 	/* When we're using MMX assembler, weights are applied in the 8x8
297 	   iDCT prescale */
298 #if (!ARCH_X86) && (!ARCH_X86_64)
299 	int i;
300 
301 	for (i=0;i<64;i++) {
302 		block[i] *= dv_weight_inverse_88_matrix[i];
303 	}
304 #endif
305 }
306 
_dv_weight_248_inverse(dv_coeff_t * block)307 void _dv_weight_248_inverse(dv_coeff_t *block)
308 {
309 	/* These weights are now folded into the idct prescalers - so this
310 	   function doesn't do anything. */
311 #if 0
312 	int x,z;
313 	dv_coeff_t dc;
314 
315 	dc = block[0];
316 	for (z=0;z<4;z++) {
317 		for (x=0;x<8;x++) {
318 			block[z*8+x] /= (W[x] * W[2*z] / 2);
319 			block[(z+4)*8+x] /= (W[x] * W[2*z] / 2);
320 		}
321 	}
322 	block[0] = dc * 4;
323 #endif
324 }
325 
326 /*@}*/
327