1 /*
2 tabinit.c: initialize tables...
3
4 copyright ?-2008 by the mpg123 project - free software under the terms of the LGPL 2.1
5 see COPYING and AUTHORS files in distribution or http://mpg123.org
6 initially written by Michael Hipp
7 */
8
9 #include "mpg123lib_intern.h"
10 #include "debug.h"
11
12 /* That altivec alignment part here should not hurt generic code, I hope */
13 #ifdef OPT_ALTIVEC
14 static ALIGNED(16) real cos64[16];
15 static ALIGNED(16) real cos32[8];
16 static ALIGNED(16) real cos16[4];
17 static ALIGNED(16) real cos8[2];
18 static ALIGNED(16) real cos4[1];
19 #elif defined(REAL_IS_FIXED) && defined(PRECALC_TABLES)
20 static real cos64[16] =
21 {
22 8398725,8480395,8647771,8909416,9279544,9780026,10443886,11321405,
23 12491246,14081950,16316987,19619946,24900150,34523836,57170182,170959967
24 };
25 static real cos32[8] =
26 {
27 8429197,8766072,9511743,10851869,13223040,17795219,28897867,85583072
28 };
29 static real cos16[4] =
30 {
31 8552951,10088893,15099095,42998586
32 };
33 static real cos8[2] =
34 {
35 9079764,21920489
36 };
37 static real cos4[1] =
38 {
39 11863283
40 };
41 #else
42 static real cos64[16],cos32[8],cos16[4],cos8[2],cos4[1];
43 #endif
44
45 real *pnts[] = { cos64,cos32,cos16,cos8,cos4 };
46
47
48 static long intwinbase[] = {
49 0, -1, -1, -1, -1, -1, -1, -2, -2, -2,
50 -2, -3, -3, -4, -4, -5, -5, -6, -7, -7,
51 -8, -9, -10, -11, -13, -14, -16, -17, -19, -21,
52 -24, -26, -29, -31, -35, -38, -41, -45, -49, -53,
53 -58, -63, -68, -73, -79, -85, -91, -97, -104, -111,
54 -117, -125, -132, -139, -147, -154, -161, -169, -176, -183,
55 -190, -196, -202, -208, -213, -218, -222, -225, -227, -228,
56 -228, -227, -224, -221, -215, -208, -200, -189, -177, -163,
57 -146, -127, -106, -83, -57, -29, 2, 36, 72, 111,
58 153, 197, 244, 294, 347, 401, 459, 519, 581, 645,
59 711, 779, 848, 919, 991, 1064, 1137, 1210, 1283, 1356,
60 1428, 1498, 1567, 1634, 1698, 1759, 1817, 1870, 1919, 1962,
61 2001, 2032, 2057, 2075, 2085, 2087, 2080, 2063, 2037, 2000,
62 1952, 1893, 1822, 1739, 1644, 1535, 1414, 1280, 1131, 970,
63 794, 605, 402, 185, -45, -288, -545, -814, -1095, -1388,
64 -1692, -2006, -2330, -2663, -3004, -3351, -3705, -4063, -4425, -4788,
65 -5153, -5517, -5879, -6237, -6589, -6935, -7271, -7597, -7910, -8209,
66 -8491, -8755, -8998, -9219, -9416, -9585, -9727, -9838, -9916, -9959,
67 -9966, -9935, -9863, -9750, -9592, -9389, -9139, -8840, -8492, -8092,
68 -7640, -7134, -6574, -5959, -5288, -4561, -3776, -2935, -2037, -1082,
69 -70, 998, 2122, 3300, 4533, 5818, 7154, 8540, 9975, 11455,
70 12980, 14548, 16155, 17799, 19478, 21189, 22929, 24694, 26482, 28289,
71 30112, 31947, 33791, 35640, 37489, 39336, 41176, 43006, 44821, 46617,
72 48390, 50137, 51853, 53534, 55178, 56778, 58333, 59838, 61289, 62684,
73 64019, 65290, 66494, 67629, 68692, 69679, 70590, 71420, 72169, 72835,
74 73415, 73908, 74313, 74630, 74856, 74992, 75038 };
75
prepare_decode_tables()76 void prepare_decode_tables()
77 {
78 #if !defined(REAL_IS_FIXED) || !defined(PRECALC_TABLES)
79 int i,k,kr,divv;
80 real *costab;
81
82 for(i=0;i<5;i++)
83 {
84 kr=0x10>>i; divv=0x40>>i;
85 costab = pnts[i];
86 for(k=0;k<kr;k++)
87 costab[k] = DOUBLE_TO_REAL(1.0 / (2.0 * cos(M_PI * ((double) k * 2.0 + 1.0) / (double) divv)));
88 }
89 #endif
90 }
91
92 #ifdef OPT_MMXORSSE
93 #if !defined(OPT_X86_64) && !defined(OPT_NEON) && !defined(OPT_NEON64) && !defined(OPT_AVX)
94 void make_decode_tables_mmx_asm(long scaleval, float* decwin_mmx, float *decwins);
make_decode_tables_mmx(mpg123_handle * fr)95 void make_decode_tables_mmx(mpg123_handle *fr)
96 {
97 debug("MMX decode tables");
98 /* Take care: The scale should be like before, when we didn't have float output all around. */
99 make_decode_tables_mmx_asm((long)((fr->lastscale < 0 ? fr->p.outscale : fr->lastscale)*SHORT_SCALE), fr->decwin_mmx, fr->decwins);
100 debug("MMX decode tables done");
101 }
102 #else
103
104 /* This mimics round() as defined in C99. We stay C89. */
rounded(double f)105 static int rounded(double f)
106 {
107 return (int)(f>0 ? floor(f+0.5) : ceil(f-0.5));
108 }
109
110 /* x86-64 doesn't use asm version */
make_decode_tables_mmx(mpg123_handle * fr)111 void make_decode_tables_mmx(mpg123_handle *fr)
112 {
113 int i,j,val;
114 int idx = 0;
115 short *ptr = (short *)fr->decwins;
116 /* Scale is always based on 1.0 . */
117 double scaleval = -0.5*(fr->lastscale < 0 ? fr->p.outscale : fr->lastscale);
118 debug1("MMX decode tables with scaleval %g", scaleval);
119 for(i=0,j=0;i<256;i++,j++,idx+=32)
120 {
121 if(idx < 512+16)
122 fr->decwin_mmx[idx+16] = fr->decwin_mmx[idx] = DOUBLE_TO_REAL((double) intwinbase[j] * scaleval);
123
124 if(i % 32 == 31)
125 idx -= 1023;
126 if(i % 64 == 63)
127 scaleval = - scaleval;
128 }
129
130 for( /* i=256 */ ;i<512;i++,j--,idx+=32)
131 {
132 if(idx < 512+16)
133 fr->decwin_mmx[idx+16] = fr->decwin_mmx[idx] = DOUBLE_TO_REAL((double) intwinbase[j] * scaleval);
134
135 if(i % 32 == 31)
136 idx -= 1023;
137 if(i % 64 == 63)
138 scaleval = - scaleval;
139 }
140
141 for(i=0; i<512; i++) {
142 if(i&1) val = rounded(fr->decwin_mmx[i]*0.5);
143 else val = rounded(fr->decwin_mmx[i]*-0.5);
144 if(val > 32767) val = 32767;
145 else if(val < -32768) val = -32768;
146 ptr[i] = val;
147 }
148 for(i=512; i<512+32; i++) {
149 if(i&1) val = rounded(fr->decwin_mmx[i]*0.5);
150 else val = 0;
151 if(val > 32767) val = 32767;
152 else if(val < -32768) val = -32768;
153 ptr[i] = val;
154 }
155 for(i=0; i<512; i++) {
156 val = rounded(fr->decwin_mmx[511-i]*-0.5);
157 if(val > 32767) val = 32767;
158 else if(val < -32768) val = -32768;
159 ptr[512+32+i] = val;
160 }
161 debug("decode tables done");
162 }
163 #endif
164 #endif
165
166 #ifdef REAL_IS_FIXED
167 /* Need saturating multiplication that keeps table values in 32 bit range,
168 with the option to swap sign at will (so -2**31 is out).
169 This code is far from the decoder core and so assembly optimization might
170 be overkill. */
sat_mul32(int32_t a,int32_t b)171 static int32_t sat_mul32(int32_t a, int32_t b)
172 {
173 int64_t prod = (int64_t)a * (int64_t)b;
174 /* TODO: record the clipping? An extra flag? */
175 if(prod > 2147483647L) return 2147483647L;
176 if(prod < -2147483647L) return -2147483647L;
177 return (int32_t)prod;
178 }
179 #endif
180
make_decode_tables(mpg123_handle * fr)181 void make_decode_tables(mpg123_handle *fr)
182 {
183 int i,j;
184 int idx = 0;
185 double scaleval;
186 #ifdef REAL_IS_FIXED
187 real scaleval_long;
188 #endif
189 /* Scale is always based on 1.0 . */
190 scaleval = -0.5*(fr->lastscale < 0 ? fr->p.outscale : fr->lastscale);
191 debug1("decode tables with scaleval %g", scaleval);
192 #ifdef REAL_IS_FIXED
193 scaleval_long = DOUBLE_TO_REAL_15(scaleval);
194 debug1("decode table with fixed scaleval %li", (long)scaleval_long);
195 if(scaleval_long > 28618 || scaleval_long < -28618)
196 {
197 /* TODO: Limit the scaleval itself or limit the multiplication afterwards?
198 The former basically disables significant amplification for fixed-point
199 decoders, but avoids (possibly subtle) distortion. */
200 /* This would limit the amplification instead:
201 scaleval_long = scaleval_long < 0 ? -28618 : 28618; */
202 if(NOQUIET) warning("Desired amplification may introduce distortion.");
203 }
204 #endif
205 for(i=0,j=0;i<256;i++,j++,idx+=32)
206 {
207 if(idx < 512+16)
208 #ifdef REAL_IS_FIXED
209 fr->decwin[idx+16] = fr->decwin[idx] =
210 REAL_SCALE_WINDOW(sat_mul32(intwinbase[j],scaleval_long));
211 #else
212 fr->decwin[idx+16] = fr->decwin[idx] = DOUBLE_TO_REAL((double) intwinbase[j] * scaleval);
213 #endif
214
215 if(i % 32 == 31)
216 idx -= 1023;
217 if(i % 64 == 63)
218 #ifdef REAL_IS_FIXED
219 scaleval_long = - scaleval_long;
220 #else
221 scaleval = - scaleval;
222 #endif
223 }
224
225 for( /* i=256 */ ;i<512;i++,j--,idx+=32)
226 {
227 if(idx < 512+16)
228 #ifdef REAL_IS_FIXED
229 fr->decwin[idx+16] = fr->decwin[idx] =
230 REAL_SCALE_WINDOW(sat_mul32(intwinbase[j],scaleval_long));
231 #else
232 fr->decwin[idx+16] = fr->decwin[idx] = DOUBLE_TO_REAL((double) intwinbase[j] * scaleval);
233 #endif
234
235 if(i % 32 == 31)
236 idx -= 1023;
237 if(i % 64 == 63)
238 #ifdef REAL_IS_FIXED
239 scaleval_long = - scaleval_long;
240 #else
241 scaleval = - scaleval;
242 #endif
243 }
244 #if defined(OPT_X86_64) || defined(OPT_ALTIVEC) || defined(OPT_SSE) || defined(OPT_SSE_VINTAGE) || defined(OPT_ARM) || defined(OPT_NEON) || defined(OPT_NEON64) || defined(OPT_AVX)
245 if( fr->cpu_opts.type == x86_64
246 || fr->cpu_opts.type == altivec
247 || fr->cpu_opts.type == sse
248 || fr->cpu_opts.type == sse_vintage
249 || fr->cpu_opts.type == arm
250 || fr->cpu_opts.type == neon
251 || fr->cpu_opts.type == neon64
252 || fr->cpu_opts.type == avx )
253 { /* for float SSE / AltiVec / ARM decoder */
254 for(i=512; i<512+32; i++)
255 {
256 fr->decwin[i] = (i&1) ? fr->decwin[i] : 0;
257 }
258 for(i=0; i<512; i++)
259 {
260 fr->decwin[512+32+i] = -fr->decwin[511-i];
261 }
262 #if defined(OPT_NEON) || defined(OPT_NEON64)
263 if(fr->cpu_opts.type == neon || fr->cpu_opts.type == neon64)
264 {
265 for(i=0; i<512; i+=2)
266 {
267 fr->decwin[i] = -fr->decwin[i];
268 }
269 }
270 #endif
271 }
272 #endif
273 debug("decode tables done");
274 }
275
276 #ifndef NO_8BIT
make_conv16to8_table(mpg123_handle * fr)277 int make_conv16to8_table(mpg123_handle *fr)
278 {
279 int i;
280 int mode = fr->af.dec_enc;
281
282 /*
283 * ????: 8.0 is right but on SB cards '2.0' is a better value ???
284 */
285 const double mul = 8.0;
286
287 if(!fr->conv16to8_buf){
288 fr->conv16to8_buf = (unsigned char *) malloc(8192);
289 if(!fr->conv16to8_buf) {
290 fr->err = MPG123_ERR_16TO8TABLE;
291 if(NOQUIET) error("Can't allocate 16 to 8 converter table!");
292 return -1;
293 }
294 fr->conv16to8 = fr->conv16to8_buf + 4096;
295 }
296
297 switch(mode)
298 {
299 case MPG123_ENC_ULAW_8:
300 {
301 double m=127.0 / log(256.0);
302 int c1;
303
304 for(i=-4096;i<4096;i++)
305 {
306 /* dunno whether this is a valid transformation rule ?!?!? */
307 if(i < 0)
308 c1 = 127 - (int) (log( 1.0 - 255.0 * (double) i*mul / 32768.0 ) * m);
309 else
310 c1 = 255 - (int) (log( 1.0 + 255.0 * (double) i*mul / 32768.0 ) * m);
311 if(c1 < 0 || c1 > 255)
312 {
313 if(NOQUIET) error2("Converror %d %d",i,c1);
314 return -1;
315 }
316 if(c1 == 0)
317 c1 = 2;
318 fr->conv16to8[i] = (unsigned char) c1;
319 }
320 }
321 break;
322 case MPG123_ENC_SIGNED_8:
323 for(i=-4096;i<4096;i++)
324 fr->conv16to8[i] = i>>5;
325 break;
326 case MPG123_ENC_UNSIGNED_8:
327 for(i=-4096;i<4096;i++)
328 fr->conv16to8[i] = (i>>5)+128;
329 break;
330 case MPG123_ENC_ALAW_8:
331 {
332 /*
333 Let's believe Wikipedia (http://en.wikipedia.org/wiki/G.711) that this
334 is the correct table:
335
336 s0000000wxyza... n000wxyz [0-31] -> [0-15]
337 s0000001wxyza... n001wxyz [32-63] -> [16-31]
338 s000001wxyzab... n010wxyz [64-127] -> [32-47]
339 s00001wxyzabc... n011wxyz [128-255] -> [48-63]
340 s0001wxyzabcd... n100wxyz [256-511] -> [64-79]
341 s001wxyzabcde... n101wxyz [512-1023] -> [80-95]
342 s01wxyzabcdef... n110wxyz [1024-2047] -> [96-111]
343 s1wxyzabcdefg... n111wxyz [2048-4095] -> [112-127]
344
345 Let's extend to -4096, too.
346 Also, bytes are xored with 0x55 for transmission.
347
348 Since it sounds OK, I assume it is fine;-)
349 */
350 for(i=0; i<64; ++i)
351 fr->conv16to8[i] = ((unsigned int)i)>>1;
352 for(i=64; i<128; ++i)
353 fr->conv16to8[i] = ((((unsigned int)i)>>2) & 0xf) | (2<<4);
354 for(i=128; i<256; ++i)
355 fr->conv16to8[i] = ((((unsigned int)i)>>3) & 0xf) | (3<<4);
356 for(i=256; i<512; ++i)
357 fr->conv16to8[i] = ((((unsigned int)i)>>4) & 0xf) | (4<<4);
358 for(i=512; i<1024; ++i)
359 fr->conv16to8[i] = ((((unsigned int)i)>>5) & 0xf) | (5<<4);
360 for(i=1024; i<2048; ++i)
361 fr->conv16to8[i] = ((((unsigned int)i)>>6) & 0xf) | (6<<4);
362 for(i=2048; i<4096; ++i)
363 fr->conv16to8[i] = ((((unsigned int)i)>>7) & 0xf) | (7<<4);
364
365 for(i=-4095; i<0; ++i)
366 fr->conv16to8[i] = fr->conv16to8[-i] | 0x80;
367
368 fr->conv16to8[-4096] = fr->conv16to8[-4095];
369
370 for(i=-4096;i<4096;i++)
371 {
372 /* fr->conv16to8[i] = - i>>5; */
373 /* fprintf(stderr, "table %i %i\n", i<<AUSHIFT, fr->conv16to8[i]); */
374 fr->conv16to8[i] ^= 0x55;
375 }
376 }
377 break;
378 default:
379 fr->err = MPG123_ERR_16TO8TABLE;
380 if(NOQUIET) error("Unknown 8 bit encoding choice.");
381 return -1;
382 break;
383 }
384
385 return 0;
386 }
387 #endif
388
389