1 /*
2 * weighting.c
3 *
4 * Copyright (C) Charles 'Buck' Krasic - April 2000
5 * Copyright (C) Erik Walthinsen - April 2000
6 *
7 * This file is part of libdv, a free DV (IEC 61834/SMPTE 314M)
8 * codec.
9 *
10 * libdv is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU Lesser Public License as published by
12 * the Free Software Foundation; either version 2.1, or (at your
13 * option) any later version.
14 *
15 * libdv is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser Public License
21 * along with libdv; see the file COPYING. If not, write to
22 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 * The libdv homepage is http://libdv.sourceforge.net/.
25 */
26
27 /** @file
28 * @ingroup weighting
29 * @brief Implementation for @link weighting Coefficient Weighting @endlink
30 */
31
32 /** @weakgroup weighting Coefficient Weighting
33 *
34 * DCT coeffictions are stored with 10 bits of precision, although
35 * there is the possibility that the DCT transform will produce
36 * values outside of this range. The weighting operation ensures
37 * that all values fit into the allocated 10 bit range.
38 *
39 * @{
40 */
41
42 #if HAVE_CONFIG_H
43 # include <config.h>
44 #endif
45
46 #include <math.h>
47
48 #include "weighting.h"
49
50 dv_coeff_t preSC[64] ALIGN32 = {
51 16384,22725,21407,19266, 16384,12873,8867,4520,
52 22725,31521,29692,26722, 22725,17855,12299,6270,
53 21407,29692,27969,25172, 21407,16819,11585,5906,
54 19266,26722,25172,22654, 19266,15137,10426,5315,
55
56 16384,22725,21407,19266, 16384,12873,8867,4520,
57 12873,17855,16819,15137, 25746,20228,13933,7103,
58 17734,24598,23170,20853, 17734,13933,9597,4892,
59 18081,25080,23624,21261, 18081,14206,9785,4988
60 };
61
62 dv_coeff_t postSC88[64] ALIGN32;
63 dv_coeff_t postSC248[64] ALIGN32;
64
65 static double W[8];
66
67 #if (!ARCH_X86) && (!ARCH_X86_64)
68 static dv_coeff_t dv_weight_inverse_88_matrix[64];
69 #endif
70
71 #if BRUTE_FORCE_DCT_88
72 static double dv_weight_88_matrix[64];
73 #endif
74 #if BRUTE_FORCE_DCT_248
75 static double dv_weight_248_matrix[64];
76 #endif
77
78 double dv_weight_inverse_248_matrix[64];
79
80
CS(int m)81 static inline double CS(int m) {
82 return cos(((double)m) * M_PI / 16.0);
83 }
84
85 static void weight_88_inverse_float(double *block);
86 static void weight_88_float(double *block);
87 static void weight_248_float(double *block);
88
int_val(double f)89 static inline short int_val(double f)
90 {
91 return (short) floor(f + 0.5);
92 }
93
postscale88_init(double * post_sc)94 static void postscale88_init(double* post_sc)
95 {
96 int i,j;
97 double ci,cj;
98
99 for( i = 0; i < 8; i++ ) {
100 ci = i==0 ? 1/(8.*sqrt(2.)) : 1.0/16.0;
101 /* di = i==0 ? 1.5/(sqrt(2.)) : 0.5;
102 ps3[i] = 2.0*2.0*ci/cos(i*M_PI/16);
103 israelh. this is table1 from AAN paper.
104 Note the trick if 8 or 16 deivision
105 */
106 for( j = 0; j < 8; j++) {
107 cj = j==0 ? 1/(8*sqrt(2.)) : 1.0/16.0;
108 post_sc[i * 8 + j] = 4.0*4.0 * ci * cj /
109 (cos(i*M_PI/16)*cos(j*M_PI/16));
110 /* israelh. patch the first 4.0? */
111 }
112 }
113 post_sc[63] = 1.0;
114 }
115
postscale248_init(double * post_sc)116 static void postscale248_init(double* post_sc)
117 {
118 int i,j;
119 double ci,cj;
120
121 for( i = 0; i < 4; i++ ) {
122 ci = i==0 ? 1/(4.*sqrt(2.)) : 1.0/8.0;
123 /* di = i==0 ? 1.5/(sqrt(2.)) : 0.5;
124 ps3[i] = 2.0*2.0*ci/cos(i*M_PI/16);
125 israelh. this is table1 from AAN paper.
126 Note the trick if 8 or 16 deivision
127 */
128 for( j = 0; j < 8; j++) {
129 cj = j==0 ? 1/(8*sqrt(2.)) : 1.0/16.0;
130 post_sc[i * 8 + j] = 4.0*2.0 * ci * cj /
131 (cos(i*M_PI/8)*cos(j*M_PI/16));
132 post_sc[i * 8 + 32 + j] = 4.0*2.0 * ci * cj /
133 (cos(i*M_PI/8)*cos(j*M_PI/16));
134 /* israelh. patch the first 4.0? */
135 }
136 }
137 post_sc[63-32] = 1.0;
138 post_sc[63] = 1.0;
139 }
140
_dv_weight_init(void)141 void _dv_weight_init(void)
142 {
143 double temp[64];
144 double temp_postsc[64];
145 int i, z, x;
146 #if ARCH_X86 || ARCH_X86_64
147 const double dv_weight_bias_factor = (double)(1UL << DV_WEIGHT_BIAS);
148 #endif
149
150 W[0] = 1.0;
151 W[1] = CS(4) / (4.0 * CS(7) * CS(2));
152 W[2] = CS(4) / (2.0 * CS(6));
153 W[3] = 1.0 / (2 * CS(5));
154 W[4] = 7.0 / 8.0;
155 W[5] = CS(4) / CS(3);
156 W[6] = CS(4) / CS(2);
157 W[7] = CS(4) / CS(1);
158
159 for (i = 0; i < 64; i++) {
160 temp[i] = 1.0;
161 }
162 weight_88_inverse_float(temp);
163
164 for (i=0;i<64;i++) {
165 #if (!ARCH_X86) && (!ARCH_X86_64)
166 dv_weight_inverse_88_matrix[i] = (dv_coeff_t)rint(temp[i]);
167 #else
168 /* If we're using MMX assembler, fold weights into the iDCT
169 prescale */
170 preSC[i] *= temp[i] * (16.0 / dv_weight_bias_factor);
171 #endif
172 }
173
174 postscale88_init(temp_postsc);
175 for (i = 0; i < 64; i++) {
176 temp[i] = 1.0;
177 }
178 weight_88_float(temp);
179
180 for (i=0;i<64;i++) {
181 #if BRUTE_FORCE_DCT_88
182 dv_weight_88_matrix[i] = temp[i];
183 #else
184 /* If we're not using brute force(tm),
185 fold weights into the DCT
186 postscale */
187 postSC88[i]= int_val(temp_postsc[i] * temp[i] * 32768.0 * 2.0);
188 #endif
189 }
190 postSC88[63] = temp[63] * 32768 * 2.0;
191
192 postscale248_init(temp_postsc);
193
194 for (i = 0; i < 64; i++) {
195 temp[i] = 1.0;
196 }
197 weight_248_float(temp);
198
199 for (i=0;i<64;i++) {
200 #if BRUTE_FORCE_DCT_248
201 dv_weight_248_matrix[i] = temp[i];
202 #else
203 /* If we're not using brute force(tm),
204 fold weights into the DCT
205 postscale */
206 postSC248[i]= int_val(temp_postsc[i]* temp[i] * 32768.0 * 2.0);
207 #endif
208 }
209
210 for (z=0;z<4;z++) {
211 for (x=0;x<8;x++) {
212 dv_weight_inverse_248_matrix[z*8+x] =
213 2.0 / (W[x] * W[2*z]);
214 dv_weight_inverse_248_matrix[(z+4)*8+x] =
215 2.0 / (W[x] * W[2*z]);
216
217 }
218 }
219 dv_weight_inverse_248_matrix[0] = 4.0;
220 }
221
_dv_weight_88(dv_coeff_t * block)222 void _dv_weight_88(dv_coeff_t *block)
223 {
224 /* These weights are now folded into the dct postscaler - so this
225 function doesn't do anything. */
226 #if BRUTE_FORCE_DCT_88
227 int i;
228
229 for (i=0;i<64;i++) {
230 block[i] *= dv_weight_88_matrix[i];
231 }
232 #endif
233 }
234
weight_88_float(double * block)235 static void weight_88_float(double *block)
236 {
237 int x,y;
238 double dc;
239
240 dc = block[0];
241 for (y=0;y<8;y++) {
242 for (x=0;x<8;x++) {
243 block[y*8+x] *= W[x] * W[y] / 2.0;
244 }
245 }
246 block[0] = dc / 4.0;
247 }
248
249
_dv_weight_248(dv_coeff_t * block)250 void _dv_weight_248(dv_coeff_t *block)
251 {
252 /* These weights are now folded into the dct postscaler - so this
253 function doesn't do anything. */
254 #if BRUTE_FORCE_DCT_248
255 int i;
256
257 for (i=0;i<64;i++) {
258 block[i] *= dv_weight_248_matrix[i];
259 }
260 #endif
261 }
262
weight_248_float(double * block)263 static void weight_248_float(double *block)
264 {
265 int x,z;
266 double dc;
267
268 dc = block[0];
269 for (z=0;z<4;z++) {
270 for (x=0;x<8;x++) {
271 block[z*8+x] *= W[x] * W[2*z] / 2;
272 block[(z+4)*8+x] *= W[x] * W[2*z] / 2;
273 }
274 }
275 block[0] = dc / 4;
276 block[32] = dc / 4;
277 }
278
279
weight_88_inverse_float(double * block)280 static void weight_88_inverse_float(double *block)
281 {
282 int x,y;
283 double dc;
284
285 dc = block[0];
286 for (y=0;y<8;y++) {
287 for (x=0;x<8;x++) {
288 block[y*8+x] /= (W[x] * W[y] / 2.0);
289 }
290 }
291 block[0] = dc * 4.0;
292 }
293
_dv_weight_88_inverse(dv_coeff_t * block)294 void _dv_weight_88_inverse(dv_coeff_t *block)
295 {
296 /* When we're using MMX assembler, weights are applied in the 8x8
297 iDCT prescale */
298 #if (!ARCH_X86) && (!ARCH_X86_64)
299 int i;
300
301 for (i=0;i<64;i++) {
302 block[i] *= dv_weight_inverse_88_matrix[i];
303 }
304 #endif
305 }
306
_dv_weight_248_inverse(dv_coeff_t * block)307 void _dv_weight_248_inverse(dv_coeff_t *block)
308 {
309 /* These weights are now folded into the idct prescalers - so this
310 function doesn't do anything. */
311 #if 0
312 int x,z;
313 dv_coeff_t dc;
314
315 dc = block[0];
316 for (z=0;z<4;z++) {
317 for (x=0;x<8;x++) {
318 block[z*8+x] /= (W[x] * W[2*z] / 2);
319 block[(z+4)*8+x] /= (W[x] * W[2*z] / 2);
320 }
321 }
322 block[0] = dc * 4;
323 #endif
324 }
325
326 /*@}*/
327