xref: /reactos/sdk/lib/3rdparty/libmpg123/layer3.c (revision 02e84521)
1 /*
2 	layer3.c: the layer 3 decoder
3 
4 	copyright 1995-2017 by the mpg123 project - free software under the terms of the LGPL 2.1
5 	see COPYING and AUTHORS files in distribution or http://mpg123.org
6 	initially written by Michael Hipp
7 
8 	Dear visitor:
9 	If you feel you don't understand fully the works of this file, your feeling might be correct.
10 
11 	Optimize-TODO: put short bands into the band-field without the stride of 3 reals
12 	Length-optimze: unify long and short band code where it is possible
13 
14 	The int-vs-pointer situation has to be cleaned up.
15 */
16 
17 #include "mpg123lib_intern.h"
18 #ifdef USE_NEW_HUFFTABLE
19 #include "newhuffman.h"
20 #else
21 #include "huffman.h"
22 #endif
23 #include "getbits.h"
24 #include "debug.h"
25 
26 
27 
28 /* define CUT_SFB21 if you want to cut-off the frequency above 16kHz */
29 #if 0
30 #define CUT_SFB21
31 #endif
32 
33 #ifdef REAL_IS_FIXED
34 #define NEW_DCT9
35 #include "l3_integer_tables.h"
36 #else
37 /* static one-time calculated tables... or so */
38 static real ispow[8207];
39 static real aa_ca[8],aa_cs[8];
40 static ALIGNED(16) real win[4][36];
41 static ALIGNED(16) real win1[4][36];
42 real COS9[9]; /* dct36_3dnow wants to use that */
43 static real COS6_1,COS6_2;
44 real tfcos36[9]; /* dct36_3dnow wants to use that */
45 static real tfcos12[3];
46 #define NEW_DCT9
47 #ifdef NEW_DCT9
48 static real cos9[3],cos18[3];
49 static real tan1_1[16],tan2_1[16],tan1_2[16],tan2_2[16];
50 static real pow1_1[2][32],pow2_1[2][32],pow1_2[2][32],pow2_2[2][32];
51 #endif
52 #endif
53 
54 /* Decoder state data, living on the stack of do_layer3. */
55 
56 struct gr_info_s
57 {
58 	int scfsi;
59 	unsigned part2_3_length;
60 	unsigned big_values;
61 	unsigned scalefac_compress;
62 	unsigned block_type;
63 	unsigned mixed_block_flag;
64 	unsigned table_select[3];
65 	/* Making those two signed int as workaround for open64/pathscale/sun compilers, and also for consistency, since they're worked on together with other signed variables. */
66 	int maxband[3];
67 	int maxbandl;
68 	unsigned maxb;
69 	unsigned region1start;
70 	unsigned region2start;
71 	unsigned preflag;
72 	unsigned scalefac_scale;
73 	unsigned count1table_select;
74 	real *full_gain[3];
75 	real *pow2gain;
76 };
77 
78 struct III_sideinfo
79 {
80 	unsigned main_data_begin;
81 	unsigned private_bits;
82 	/* Hm, funny... struct inside struct... */
83 	struct { struct gr_info_s gr[2]; } ch[2];
84 };
85 
86 struct bandInfoStruct
87 {
88 	unsigned short longIdx[23];
89 	unsigned char longDiff[22];
90 	unsigned short shortIdx[14];
91 	unsigned char shortDiff[13];
92 };
93 
94 /* Techy details about our friendly MPEG data. Fairly constant over the years;-) */
95 static const struct bandInfoStruct bandInfo[9] =
96 {
97 	{ /* MPEG 1.0 */
98 		{0,4,8,12,16,20,24,30,36,44,52,62,74, 90,110,134,162,196,238,288,342,418,576},
99 		{4,4,4,4,4,4,6,6,8, 8,10,12,16,20,24,28,34,42,50,54, 76,158},
100 		{0,4*3,8*3,12*3,16*3,22*3,30*3,40*3,52*3,66*3, 84*3,106*3,136*3,192*3},
101 		{4,4,4,4,6,8,10,12,14,18,22,30,56}
102 	},
103 	{
104 		{0,4,8,12,16,20,24,30,36,42,50,60,72, 88,106,128,156,190,230,276,330,384,576},
105 		{4,4,4,4,4,4,6,6,6, 8,10,12,16,18,22,28,34,40,46,54, 54,192},
106 		{0,4*3,8*3,12*3,16*3,22*3,28*3,38*3,50*3,64*3, 80*3,100*3,126*3,192*3},
107 		{4,4,4,4,6,6,10,12,14,16,20,26,66}
108 	},
109 	{
110 		{0,4,8,12,16,20,24,30,36,44,54,66,82,102,126,156,194,240,296,364,448,550,576},
111 		{4,4,4,4,4,4,6,6,8,10,12,16,20,24,30,38,46,56,68,84,102, 26},
112 		{0,4*3,8*3,12*3,16*3,22*3,30*3,42*3,58*3,78*3,104*3,138*3,180*3,192*3},
113 		{4,4,4,4,6,8,12,16,20,26,34,42,12}
114 	},
115 	{ /* MPEG 2.0 */
116 		{0,6,12,18,24,30,36,44,54,66,80,96,116,140,168,200,238,284,336,396,464,522,576},
117 		{6,6,6,6,6,6,8,10,12,14,16,20,24,28,32,38,46,52,60,68,58,54 } ,
118 		{0,4*3,8*3,12*3,18*3,24*3,32*3,42*3,56*3,74*3,100*3,132*3,174*3,192*3} ,
119 		{4,4,4,6,6,8,10,14,18,26,32,42,18 }
120 	},
121 	{ /* Twiddling 3 values here (not just 330->332!) fixed bug 1895025. */
122 		{0,6,12,18,24,30,36,44,54,66,80,96,114,136,162,194,232,278,332,394,464,540,576},
123 		{6,6,6,6,6,6,8,10,12,14,16,18,22,26,32,38,46,54,62,70,76,36 },
124 		{0,4*3,8*3,12*3,18*3,26*3,36*3,48*3,62*3,80*3,104*3,136*3,180*3,192*3},
125 		{4,4,4,6,8,10,12,14,18,24,32,44,12 }
126 	},
127 	{
128 		{0,6,12,18,24,30,36,44,54,66,80,96,116,140,168,200,238,284,336,396,464,522,576},
129 		{6,6,6,6,6,6,8,10,12,14,16,20,24,28,32,38,46,52,60,68,58,54 },
130 		{0,4*3,8*3,12*3,18*3,26*3,36*3,48*3,62*3,80*3,104*3,134*3,174*3,192*3},
131 		{4,4,4,6,8,10,12,14,18,24,30,40,18 }
132 	},
133 	{ /* MPEG 2.5 */
134 		{0,6,12,18,24,30,36,44,54,66,80,96,116,140,168,200,238,284,336,396,464,522,576},
135 		{6,6,6,6,6,6,8,10,12,14,16,20,24,28,32,38,46,52,60,68,58,54},
136 		{0,12,24,36,54,78,108,144,186,240,312,402,522,576},
137 		{4,4,4,6,8,10,12,14,18,24,30,40,18}
138 	},
139 	{
140 		{0,6,12,18,24,30,36,44,54,66,80,96,116,140,168,200,238,284,336,396,464,522,576},
141 		{6,6,6,6,6,6,8,10,12,14,16,20,24,28,32,38,46,52,60,68,58,54},
142 		{0,12,24,36,54,78,108,144,186,240,312,402,522,576},
143 		{4,4,4,6,8,10,12,14,18,24,30,40,18}
144 	},
145 	{
146 		{0,12,24,36,48,60,72,88,108,132,160,192,232,280,336,400,476,566,568,570,572,574,576},
147 		{12,12,12,12,12,12,16,20,24,28,32,40,48,56,64,76,90,2,2,2,2,2},
148 		{0, 24, 48, 72,108,156,216,288,372,480,486,492,498,576},
149 		{8,8,8,12,16,20,24,28,36,2,2,2,26}
150 	}
151 };
152 
153 static int mapbuf0[9][152];
154 static int mapbuf1[9][156];
155 static int mapbuf2[9][44];
156 static int *map[9][3];
157 static int *mapend[9][3];
158 
159 static unsigned int n_slen2[512]; /* MPEG 2.0 slen for 'normal' mode */
160 static unsigned int i_slen2[256]; /* MPEG 2.0 slen for intensity stereo */
161 
162 /* Some helpers used in init_layer3 */
163 
164 #ifdef OPT_MMXORSSE
165 real init_layer3_gainpow2_mmx(mpg123_handle *fr, int i)
166 {
167 	if(!fr->p.down_sample) return DOUBLE_TO_REAL(16384.0 * pow((double)2.0,-0.25 * (double) (i+210) ));
168 	else return DOUBLE_TO_REAL(pow((double)2.0,-0.25 * (double) (i+210)));
169 }
170 #endif
171 
172 real init_layer3_gainpow2(mpg123_handle *fr, int i)
173 {
174 #if defined(REAL_IS_FIXED) && defined(PRECALC_TABLES)
175 	return gainpow2[i+256];
176 #else
177 	return DOUBLE_TO_REAL_SCALE_LAYER3(pow((double)2.0,-0.25 * (double) (i+210)),i+256);
178 #endif
179 }
180 
181 
182 /* init tables for layer-3 ... specific with the downsampling... */
183 void init_layer3(void)
184 {
185 	int i,j,k,l;
186 
187 #if !defined(REAL_IS_FIXED) || !defined(PRECALC_TABLES)
188 	for(i=0;i<8207;i++)
189 	ispow[i] = DOUBLE_TO_REAL_POW43(pow((double)i,(double)4.0/3.0));
190 
191 	for(i=0;i<8;i++)
192 	{
193 		const double Ci[8] = {-0.6,-0.535,-0.33,-0.185,-0.095,-0.041,-0.0142,-0.0037};
194 		double sq = sqrt(1.0+Ci[i]*Ci[i]);
195 		aa_cs[i] = DOUBLE_TO_REAL(1.0/sq);
196 		aa_ca[i] = DOUBLE_TO_REAL(Ci[i]/sq);
197 	}
198 
199 	for(i=0;i<18;i++)
200 	{
201 		win[0][i]    = win[1][i]    =
202 			DOUBLE_TO_REAL( 0.5*sin(M_PI/72.0 * (double)(2*(i+0) +1)) / cos(M_PI * (double)(2*(i+0) +19) / 72.0) );
203 		win[0][i+18] = win[3][i+18] =
204 			DOUBLE_TO_REAL( 0.5*sin(M_PI/72.0 * (double)(2*(i+18)+1)) / cos(M_PI * (double)(2*(i+18)+19) / 72.0) );
205 	}
206 	for(i=0;i<6;i++)
207 	{
208 		win[1][i+18] = DOUBLE_TO_REAL(0.5 / cos ( M_PI * (double) (2*(i+18)+19) / 72.0 ));
209 		win[3][i+12] = DOUBLE_TO_REAL(0.5 / cos ( M_PI * (double) (2*(i+12)+19) / 72.0 ));
210 		win[1][i+24] = DOUBLE_TO_REAL(0.5 * sin( M_PI / 24.0 * (double) (2*i+13) ) / cos ( M_PI * (double) (2*(i+24)+19) / 72.0 ));
211 		win[1][i+30] = win[3][i] = DOUBLE_TO_REAL(0.0);
212 		win[3][i+6 ] = DOUBLE_TO_REAL(0.5 * sin( M_PI / 24.0 * (double) (2*i+1 ) ) / cos ( M_PI * (double) (2*(i+6 )+19) / 72.0 ));
213 	}
214 
215 	for(i=0;i<9;i++)
216 	COS9[i] = DOUBLE_TO_REAL(cos( M_PI / 18.0 * (double) i));
217 
218 	for(i=0;i<9;i++)
219 	tfcos36[i] = DOUBLE_TO_REAL(0.5 / cos ( M_PI * (double) (i*2+1) / 36.0 ));
220 
221 	for(i=0;i<3;i++)
222 	tfcos12[i] = DOUBLE_TO_REAL(0.5 / cos ( M_PI * (double) (i*2+1) / 12.0 ));
223 
224 	COS6_1 = DOUBLE_TO_REAL(cos( M_PI / 6.0 * (double) 1));
225 	COS6_2 = DOUBLE_TO_REAL(cos( M_PI / 6.0 * (double) 2));
226 
227 #ifdef NEW_DCT9
228 	cos9[0]  = DOUBLE_TO_REAL(cos(1.0*M_PI/9.0));
229 	cos9[1]  = DOUBLE_TO_REAL(cos(5.0*M_PI/9.0));
230 	cos9[2]  = DOUBLE_TO_REAL(cos(7.0*M_PI/9.0));
231 	cos18[0] = DOUBLE_TO_REAL(cos(1.0*M_PI/18.0));
232 	cos18[1] = DOUBLE_TO_REAL(cos(11.0*M_PI/18.0));
233 	cos18[2] = DOUBLE_TO_REAL(cos(13.0*M_PI/18.0));
234 #endif
235 
236 	for(i=0;i<12;i++)
237 	{
238 		win[2][i] = DOUBLE_TO_REAL(0.5 * sin( M_PI / 24.0 * (double) (2*i+1) ) / cos ( M_PI * (double) (2*i+7) / 24.0 ));
239 	}
240 
241 	for(i=0;i<16;i++)
242 	{
243 		double t = tan( (double) i * M_PI / 12.0 );
244 		tan1_1[i] = DOUBLE_TO_REAL_15(t / (1.0+t));
245 		tan2_1[i] = DOUBLE_TO_REAL_15(1.0 / (1.0 + t));
246 		tan1_2[i] = DOUBLE_TO_REAL_15(M_SQRT2 * t / (1.0+t));
247 		tan2_2[i] = DOUBLE_TO_REAL_15(M_SQRT2 / (1.0 + t));
248 	}
249 
250 	for(i=0;i<32;i++)
251 	{
252 		for(j=0;j<2;j++)
253 		{
254 			double base = pow(2.0,-0.25*(j+1.0));
255 			double p1=1.0,p2=1.0;
256 			if(i > 0)
257 			{
258 				if( i & 1 ) p1 = pow(base,(i+1.0)*0.5);
259 				else p2 = pow(base,i*0.5);
260 			}
261 			pow1_1[j][i] = DOUBLE_TO_REAL_15(p1);
262 			pow2_1[j][i] = DOUBLE_TO_REAL_15(p2);
263 			pow1_2[j][i] = DOUBLE_TO_REAL_15(M_SQRT2 * p1);
264 			pow2_2[j][i] = DOUBLE_TO_REAL_15(M_SQRT2 * p2);
265 		}
266 	}
267 #endif
268 
269 	for(j=0;j<4;j++)
270 	{
271 		const int len[4] = { 36,36,12,36 };
272 		for(i=0;i<len[j];i+=2) win1[j][i] = + win[j][i];
273 
274 		for(i=1;i<len[j];i+=2) win1[j][i] = - win[j][i];
275 	}
276 
277 	for(j=0;j<9;j++)
278 	{
279 		const struct bandInfoStruct *bi = &bandInfo[j];
280 		int *mp;
281 		int cb,lwin;
282 		const unsigned char *bdf;
283 		int switch_idx;
284 
285 		mp = map[j][0] = mapbuf0[j];
286 		bdf = bi->longDiff;
287 		switch_idx = (j < 3) ? 8 : 6;
288 		for(i=0,cb = 0; cb < switch_idx ; cb++,i+=*bdf++)
289 		{
290 			*mp++ = (*bdf) >> 1;
291 			*mp++ = i;
292 			*mp++ = 3;
293 			*mp++ = cb;
294 		}
295 		bdf = bi->shortDiff+3;
296 		for(cb=3;cb<13;cb++)
297 		{
298 			int l = (*bdf++) >> 1;
299 			for(lwin=0;lwin<3;lwin++)
300 			{
301 				*mp++ = l;
302 				*mp++ = i + lwin;
303 				*mp++ = lwin;
304 				*mp++ = cb;
305 			}
306 			i += 6*l;
307 		}
308 		mapend[j][0] = mp;
309 
310 		mp = map[j][1] = mapbuf1[j];
311 		bdf = bi->shortDiff+0;
312 		for(i=0,cb=0;cb<13;cb++)
313 		{
314 			int l = (*bdf++) >> 1;
315 			for(lwin=0;lwin<3;lwin++)
316 			{
317 				*mp++ = l;
318 				*mp++ = i + lwin;
319 				*mp++ = lwin;
320 				*mp++ = cb;
321 			}
322 			i += 6*l;
323 		}
324 		mapend[j][1] = mp;
325 
326 		mp = map[j][2] = mapbuf2[j];
327 		bdf = bi->longDiff;
328 		for(cb = 0; cb < 22 ; cb++)
329 		{
330 			*mp++ = (*bdf++) >> 1;
331 			*mp++ = cb;
332 		}
333 		mapend[j][2] = mp;
334 	}
335 
336 	/* Now for some serious loopings! */
337 	for(i=0;i<5;i++)
338 	for(j=0;j<6;j++)
339 	for(k=0;k<6;k++)
340 	{
341 		int n = k + j * 6 + i * 36;
342 		i_slen2[n] = i|(j<<3)|(k<<6)|(3<<12);
343 	}
344 	for(i=0;i<4;i++)
345 	for(j=0;j<4;j++)
346 	for(k=0;k<4;k++)
347 	{
348 		int n = k + j * 4 + i * 16;
349 		i_slen2[n+180] = i|(j<<3)|(k<<6)|(4<<12);
350 	}
351 	for(i=0;i<4;i++)
352 	for(j=0;j<3;j++)
353 	{
354 		int n = j + i * 3;
355 		i_slen2[n+244] = i|(j<<3) | (5<<12);
356 		n_slen2[n+500] = i|(j<<3) | (2<<12) | (1<<15);
357 	}
358 	for(i=0;i<5;i++)
359 	for(j=0;j<5;j++)
360 	for(k=0;k<4;k++)
361 	for(l=0;l<4;l++)
362 	{
363 		int n = l + k * 4 + j * 16 + i * 80;
364 		n_slen2[n] = i|(j<<3)|(k<<6)|(l<<9)|(0<<12);
365 	}
366 	for(i=0;i<5;i++)
367 	for(j=0;j<5;j++)
368 	for(k=0;k<4;k++)
369 	{
370 		int n = k + j * 4 + i * 20;
371 		n_slen2[n+400] = i|(j<<3)|(k<<6)|(1<<12);
372 	}
373 }
374 
375 
376 void init_layer3_stuff(mpg123_handle *fr, real (*gainpow2)(mpg123_handle *fr, int i))
377 {
378 	int i,j;
379 
380 	for(i=-256;i<118+4;i++)	fr->gainpow2[i+256] = gainpow2(fr,i);
381 
382 	for(j=0;j<9;j++)
383 	{
384 		for(i=0;i<23;i++)
385 		{
386 			fr->longLimit[j][i] = (bandInfo[j].longIdx[i] - 1 + 8) / 18 + 1;
387 			if(fr->longLimit[j][i] > (fr->down_sample_sblimit) )
388 			fr->longLimit[j][i] = fr->down_sample_sblimit;
389 		}
390 		for(i=0;i<14;i++)
391 		{
392 			fr->shortLimit[j][i] = (bandInfo[j].shortIdx[i] - 1) / 18 + 1;
393 			if(fr->shortLimit[j][i] > (fr->down_sample_sblimit) )
394 			fr->shortLimit[j][i] = fr->down_sample_sblimit;
395 		}
396 	}
397 }
398 
399 /*
400 	Observe!
401 	Now come the actualy decoding routines.
402 */
403 
404 /* read additional side information (for MPEG 1 and MPEG 2) */
405 static int III_get_side_info(mpg123_handle *fr, struct III_sideinfo *si,int stereo, int ms_stereo,long sfreq,int single)
406 {
407 	int ch, gr;
408 	int powdiff = (single == SINGLE_MIX) ? 4 : 0;
409 
410 	const int tabs[2][5] = { { 2,9,5,3,4 } , { 1,8,1,2,9 } };
411 	const int *tab = tabs[fr->lsf];
412 
413 	si->main_data_begin = getbits(fr, tab[1]);
414 
415 	if(si->main_data_begin > fr->bitreservoir)
416 	{
417 		if(!fr->to_ignore && VERBOSE2) fprintf(stderr, "Note: missing %d bytes in bit reservoir for frame %li\n", (int)(si->main_data_begin - fr->bitreservoir), (long)fr->num);
418 
419 		/*  overwrite main_data_begin for the really available bit reservoir */
420 		backbits(fr, tab[1]);
421 		if(fr->lsf == 0)
422 		{
423 			fr->wordpointer[0] = (unsigned char) (fr->bitreservoir >> 1);
424 			fr->wordpointer[1] = (unsigned char) ((fr->bitreservoir & 1) << 7);
425 		}
426 		else fr->wordpointer[0] = (unsigned char) fr->bitreservoir;
427 
428 		/* zero "side-info" data for a silence-frame
429 		without touching audio data used as bit reservoir for following frame */
430 		memset(fr->wordpointer+2, 0, fr->ssize-2);
431 
432 		/* reread the new bit reservoir offset */
433 		si->main_data_begin = getbits(fr, tab[1]);
434 	}
435 
436 	/* Keep track of the available data bytes for the bit reservoir.
437 	Think: Substract the 2 crc bytes in parser already? */
438 	fr->bitreservoir = fr->bitreservoir + fr->framesize - fr->ssize - (fr->error_protection ? 2 : 0);
439 	/* Limit the reservoir to the max for MPEG 1.0 or 2.x . */
440 	if(fr->bitreservoir > (unsigned int) (fr->lsf == 0 ? 511 : 255))
441 	fr->bitreservoir = (fr->lsf == 0 ? 511 : 255);
442 
443 	/* Now back into less commented territory. It's code. It works. */
444 
445 	if (stereo == 1)
446 	si->private_bits = getbits_fast(fr, tab[2]);
447 	else
448 	si->private_bits = getbits_fast(fr, tab[3]);
449 
450 	if(!fr->lsf) for(ch=0; ch<stereo; ch++)
451 	{
452 		si->ch[ch].gr[0].scfsi = -1;
453 		si->ch[ch].gr[1].scfsi = getbits_fast(fr, 4);
454 	}
455 
456 	for (gr=0; gr<tab[0]; gr++)
457 	for (ch=0; ch<stereo; ch++)
458 	{
459 		register struct gr_info_s *gr_info = &(si->ch[ch].gr[gr]);
460 
461 		gr_info->part2_3_length = getbits(fr, 12);
462 		gr_info->big_values = getbits(fr, 9);
463 		if(gr_info->big_values > 288)
464 		{
465 			if(NOQUIET) error("big_values too large!");
466 			gr_info->big_values = 288;
467 		}
468 		gr_info->pow2gain = fr->gainpow2+256 - getbits_fast(fr, 8) + powdiff;
469 		if(ms_stereo) gr_info->pow2gain += 2;
470 		gr_info->scalefac_compress = getbits(fr, tab[4]);
471 		if(gr_info->part2_3_length == 0)
472 		{
473 			if(gr_info->scalefac_compress > 0)
474 				debug1( "scalefac_compress _should_ be zero instead of %i"
475 				,	gr_info->scalefac_compress );
476 			gr_info->scalefac_compress = 0;
477 		}
478 
479 		if(get1bit(fr))
480 		{ /* window switch flag  */
481 			int i;
482 			gr_info->block_type       = getbits_fast(fr, 2);
483 			gr_info->mixed_block_flag = get1bit(fr);
484 			gr_info->table_select[0]  = getbits_fast(fr, 5);
485 			gr_info->table_select[1]  = getbits_fast(fr, 5);
486 			/*
487 				table_select[2] not needed, because there is no region2,
488 				but to satisfy some verification tools we set it either.
489 			*/
490 			gr_info->table_select[2] = 0;
491 			for(i=0;i<3;i++)
492 			gr_info->full_gain[i] = gr_info->pow2gain + (getbits_fast(fr, 3)<<3);
493 
494 			if(gr_info->block_type == 0)
495 			{
496 				if(NOQUIET) error("Blocktype == 0 and window-switching == 1 not allowed.");
497 				return 1;
498 			}
499 
500 			/* region_count/start parameters are implicit in this case. */
501 			if( (!fr->lsf || (gr_info->block_type == 2)) && !fr->mpeg25)
502 			{
503 				gr_info->region1start = 36>>1;
504 				gr_info->region2start = 576>>1;
505 			}
506 			else
507 			{
508 				if(fr->mpeg25)
509 				{
510 					int r0c,r1c;
511 					if((gr_info->block_type == 2) && (!gr_info->mixed_block_flag) ) r0c = 5;
512 					else r0c = 7;
513 
514 					/* r0c+1+r1c+1 == 22, always. */
515 					r1c = 20 - r0c;
516 					gr_info->region1start = bandInfo[sfreq].longIdx[r0c+1] >> 1 ;
517 					gr_info->region2start = bandInfo[sfreq].longIdx[r0c+1+r1c+1] >> 1;
518 				}
519 				else
520 				{
521 					gr_info->region1start = 54>>1;
522 					gr_info->region2start = 576>>1;
523 				}
524 			}
525 		}
526 		else
527 		{
528 			int i,r0c,r1c;
529 			for (i=0; i<3; i++)
530 			gr_info->table_select[i] = getbits_fast(fr, 5);
531 
532 			r0c = getbits_fast(fr, 4); /* 0 .. 15 */
533 			r1c = getbits_fast(fr, 3); /* 0 .. 7 */
534 			gr_info->region1start = bandInfo[sfreq].longIdx[r0c+1] >> 1 ;
535 
536 			/* max(r0c+r1c+2) = 15+7+2 = 24 */
537 			if(r0c+1+r1c+1 > 22) gr_info->region2start = 576>>1;
538 			else gr_info->region2start = bandInfo[sfreq].longIdx[r0c+1+r1c+1] >> 1;
539 
540 			gr_info->block_type = 0;
541 			gr_info->mixed_block_flag = 0;
542 		}
543 		if(!fr->lsf) gr_info->preflag = get1bit(fr);
544 
545 		gr_info->scalefac_scale = get1bit(fr);
546 		gr_info->count1table_select = get1bit(fr);
547 	}
548 	return 0;
549 }
550 
551 
552 /* read scalefactors */
553 static int III_get_scale_factors_1(mpg123_handle *fr, int *scf,struct gr_info_s *gr_info,int ch,int gr)
554 {
555 	const unsigned char slen[2][16] =
556 	{
557 		{0, 0, 0, 0, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4},
558 		{0, 1, 2, 3, 0, 1, 2, 3, 1, 2, 3, 1, 2, 3, 2, 3}
559 	};
560 	int numbits;
561 	int num0 = slen[0][gr_info->scalefac_compress];
562 	int num1 = slen[1][gr_info->scalefac_compress];
563 
564 	if(gr_info->part2_3_length == 0)
565 	{
566 		int i;
567 		for(i=0;i<39;i++)
568 			*scf++ = 0;
569 		return 0;
570 	}
571 
572 	if(gr_info->block_type == 2)
573 	{
574 		int i=18;
575 		numbits = (num0 + num1) * 18;
576 
577 		if(gr_info->mixed_block_flag)
578 		{
579 			for (i=8;i;i--)
580 			*scf++ = getbits_fast(fr, num0);
581 
582 			i = 9;
583 			numbits -= num0; /* num0 * 17 + num1 * 18 */
584 		}
585 
586 		for(;i;i--) *scf++ = getbits_fast(fr, num0);
587 
588 		for(i = 18; i; i--) *scf++ = getbits_fast(fr, num1);
589 
590 		*scf++ = 0; *scf++ = 0; *scf++ = 0; /* short[13][0..2] = 0 */
591 	}
592 	else
593 	{
594 		int i;
595 		int scfsi = gr_info->scfsi;
596 
597 		if(scfsi < 0)
598 		{ /* scfsi < 0 => granule == 0 */
599 			for(i=11;i;i--) *scf++ = getbits_fast(fr, num0);
600 
601 			for(i=10;i;i--) *scf++ = getbits_fast(fr, num1);
602 
603 			numbits = (num0 + num1) * 10 + num0;
604 			*scf++ = 0;
605 		}
606 		else
607 		{
608 			numbits = 0;
609 			if(!(scfsi & 0x8))
610 			{
611 				for (i=0;i<6;i++) *scf++ = getbits_fast(fr, num0);
612 
613 				numbits += num0 * 6;
614 			}
615 			else scf += 6;
616 
617 			if(!(scfsi & 0x4))
618 			{
619 				for (i=0;i<5;i++) *scf++ = getbits_fast(fr, num0);
620 
621 				numbits += num0 * 5;
622 			}
623 			else scf += 5;
624 
625 			if(!(scfsi & 0x2))
626 			{
627 				for(i=0;i<5;i++) *scf++ = getbits_fast(fr, num1);
628 
629 				numbits += num1 * 5;
630 			}
631 			else scf += 5;
632 
633 			if(!(scfsi & 0x1))
634 			{
635 				for (i=0;i<5;i++) *scf++ = getbits_fast(fr, num1);
636 
637 				numbits += num1 * 5;
638 			}
639 			else scf += 5;
640 
641 			*scf++ = 0;  /* no l[21] in original sources */
642 		}
643 	}
644 	return numbits;
645 }
646 
647 
648 static int III_get_scale_factors_2(mpg123_handle *fr, int *scf,struct gr_info_s *gr_info,int i_stereo)
649 {
650 	const unsigned char *pnt;
651 	int i,j,n=0,numbits=0;
652 	unsigned int slen;
653 
654 	const unsigned char stab[3][6][4] =
655 	{
656 		{
657 			{ 6, 5, 5,5 } , { 6, 5, 7,3 } , { 11,10,0,0},
658 			{ 7, 7, 7,0 } , { 6, 6, 6,3 } , {  8, 8,5,0}
659 		},
660 		{
661 			{ 9, 9, 9,9 } , { 9, 9,12,6 } , { 18,18,0,0},
662 			{12,12,12,0 } , {12, 9, 9,6 } , { 15,12,9,0}
663 		},
664 		{
665 			{ 6, 9, 9,9 } , { 6, 9,12,6 } , { 15,18,0,0},
666 			{ 6,15,12,0 } , { 6,12, 9,6 } , {  6,18,9,0}
667 		}
668 	};
669 
670 	if(i_stereo) /* i_stereo AND second channel -> do_layer3() checks this */
671 	slen = i_slen2[gr_info->scalefac_compress>>1];
672 	else
673 	slen = n_slen2[gr_info->scalefac_compress];
674 
675 	gr_info->preflag = (slen>>15) & 0x1;
676 
677 	n = 0;
678 	if( gr_info->block_type == 2 )
679 	{
680 		n++;
681 		if(gr_info->mixed_block_flag) n++;
682 	}
683 
684 	pnt = stab[n][(slen>>12)&0x7];
685 
686 	if(gr_info->part2_3_length == 0)
687 	{
688 		int i;
689 		for(i=0;i<39;i++)
690 			*scf++ = 0;
691 		return 0;
692 	}
693 
694 	for(i=0;i<4;i++)
695 	{
696 		int num = slen & 0x7;
697 		slen >>= 3;
698 		if(num)
699 		{
700 			for(j=0;j<(int)(pnt[i]);j++) *scf++ = getbits_fast(fr, num);
701 
702 			numbits += pnt[i] * num;
703 		}
704 		else
705 		for(j=0;j<(int)(pnt[i]);j++) *scf++ = 0;
706 	}
707 
708 	n = (n << 1) + 1;
709 	for(i=0;i<n;i++) *scf++ = 0;
710 
711 	return numbits;
712 }
713 
714 static unsigned char pretab_choice[2][22] =
715 {
716 	{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
717 	{0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,2,2,3,3,3,2,0}
718 };
719 
720 /*
721 	Dequantize samples
722 	...includes Huffman decoding
723 */
724 
725 /* 24 is enough because tab13 has max. a 19 bit huffvector */
726 /* The old code played games with shifting signed integers around in not quite */
727 /* legal ways. Also, it used long where just 32 bits are required. This could */
728 /* be good or bad on 64 bit architectures ... anyway, making clear that */
729 /* 32 bits suffice is a benefit. */
730 #if 0
731 /* To reconstruct old code, use this: */
732 #define MASK_STYPE long
733 #define MASK_UTYPE unsigned long
734 #define MASK_TYPE MASK_STYPE
735 #define MSB_MASK (mask < 0)
736 #else
737 /* This should be more proper: */
738 #define MASK_STYPE int32_t
739 #define MASK_UTYPE uint32_t
740 #define MASK_TYPE  MASK_UTYPE
741 #define MSB_MASK ((MASK_UTYPE)mask & (MASK_UTYPE)1<<(sizeof(MASK_TYPE)*8-1))
742 #endif
743 #define BITSHIFT ((sizeof(MASK_TYPE)-1)*8)
744 #define REFRESH_MASK \
745 	while(num < BITSHIFT) { \
746 		mask |= ((MASK_UTYPE)getbyte(fr))<<(BITSHIFT-num); \
747 		num += 8; \
748 		part2remain -= 8; }
749 /* Complicated way of checking for msb value. This used to be (mask < 0). */
750 
751 static int III_dequantize_sample(mpg123_handle *fr, real xr[SBLIMIT][SSLIMIT],int *scf, struct gr_info_s *gr_info,int sfreq,int part2bits)
752 {
753 	int shift = 1 + gr_info->scalefac_scale;
754 	real *xrpnt = (real *) xr;
755 	int l[3],l3;
756 	int part2remain = gr_info->part2_3_length - part2bits;
757 	int *me;
758 #ifdef REAL_IS_FIXED
759 	int gainpow2_scale_idx = 378;
760 #endif
761 
762 	/* Assumption: If there is some part2_3_length at all, there should be
763 	   enough of it to work with properly. In case of zero length we silently
764 	   zero things. */
765 	if(gr_info->part2_3_length > 0)
766 	{
767 
768 	/* mhipp tree has this split up a bit... */
769 	int num=getbitoffset(fr);
770 	MASK_TYPE mask;
771 	/* We must split this, because for num==0 the shift is undefined if you do it in one step. */
772 	mask  = ((MASK_UTYPE) getbits(fr, num))<<BITSHIFT;
773 	mask <<= 8-num;
774 	part2remain -= num;
775 
776 	{
777 		int bv       = gr_info->big_values;
778 		int region1  = gr_info->region1start;
779 		int region2  = gr_info->region2start;
780 		l3 = ((576>>1)-bv)>>1;
781 
782 		/* we may lose the 'odd' bit here !! check this later again */
783 		if(bv <= region1)
784 		{
785 			l[0] = bv;
786 			l[1] = 0;
787 			l[2] = 0;
788 		}
789 		else
790 		{
791 			l[0] = region1;
792 			if(bv <= region2)
793 			{
794 				l[1] = bv - l[0];
795 				l[2] = 0;
796 			}
797 			else
798 			{
799 				l[1] = region2 - l[0];
800 				l[2] = bv - region2;
801 			}
802 		}
803 	}
804 
805 #define CHECK_XRPNT if(xrpnt >= &xr[SBLIMIT][0]) \
806 { \
807 	if(NOQUIET) \
808 		error2("attempted xrpnt overflow (%p !< %p)", (void*) xrpnt, (void*) &xr[SBLIMIT][0]); \
809 	return 1; \
810 }
811 
812 	if(gr_info->block_type == 2)
813 	{
814 		/* decoding with short or mixed mode BandIndex table */
815 		int i,max[4];
816 		int step=0,lwin=3,cb=0;
817 		register real v = 0.0;
818 		register int *m,mc;
819 
820 		if(gr_info->mixed_block_flag)
821 		{
822 			max[3] = -1;
823 			max[0] = max[1] = max[2] = 2;
824 			m = map[sfreq][0];
825 			me = mapend[sfreq][0];
826 		}
827 		else
828 		{
829 			max[0] = max[1] = max[2] = max[3] = -1;
830 			/* max[3] not really needed in this case */
831 			m = map[sfreq][1];
832 			me = mapend[sfreq][1];
833 		}
834 
835 		mc = 0;
836 		for(i=0;i<2;i++)
837 		{
838 			int lp = l[i];
839 			const struct newhuff *h = ht+gr_info->table_select[i];
840 			for(;lp;lp--,mc--)
841 			{
842 				register MASK_STYPE x,y;
843 				if( (!mc) )
844 				{
845 					mc    = *m++;
846 					xrpnt = ((real *) xr) + (*m++);
847 					lwin  = *m++;
848 					cb    = *m++;
849 					if(lwin == 3)
850 					{
851 #ifdef REAL_IS_FIXED
852 						gainpow2_scale_idx = (int)(gr_info->pow2gain + (*scf << shift) - fr->gainpow2);
853 #endif
854 						v = gr_info->pow2gain[(*scf++) << shift];
855 						step = 1;
856 					}
857 					else
858 					{
859 #ifdef REAL_IS_FIXED
860 						gainpow2_scale_idx = (int)(gr_info->full_gain[lwin] + (*scf << shift) - fr->gainpow2);
861 #endif
862 						v = gr_info->full_gain[lwin][(*scf++) << shift];
863 						step = 3;
864 					}
865 				}
866 				{
867 					const short *val = h->table;
868 					REFRESH_MASK;
869 #ifdef USE_NEW_HUFFTABLE
870 					while((y=val[(MASK_UTYPE)mask>>(BITSHIFT+4)])<0)
871 					{
872 						val -= y;
873 						num -= 4;
874 						mask <<= 4;
875 					}
876 					num -= (y >> 8);
877 					mask <<= (y >> 8);
878 					x = (y >> 4) & 0xf;
879 					y &= 0xf;
880 #else
881 					while((y=*val++)<0)
882 					{
883 						if (MSB_MASK) val -= y;
884 
885 						num--;
886 						mask <<= 1;
887 					}
888 					x = y >> 4;
889 					y &= 0xf;
890 #endif
891 				}
892 				CHECK_XRPNT;
893 				if(x == 15 && h->linbits)
894 				{
895 					max[lwin] = cb;
896 					REFRESH_MASK;
897 					x += ((MASK_UTYPE) mask) >> (BITSHIFT+8-h->linbits);
898 					num -= h->linbits+1;
899 					mask <<= h->linbits;
900 					if(MSB_MASK) *xrpnt = REAL_MUL_SCALE_LAYER3(-ispow[x], v, gainpow2_scale_idx);
901 					else         *xrpnt = REAL_MUL_SCALE_LAYER3( ispow[x], v, gainpow2_scale_idx);
902 
903 					mask <<= 1;
904 				}
905 				else if(x)
906 				{
907 					max[lwin] = cb;
908 					if(MSB_MASK) *xrpnt = REAL_MUL_SCALE_LAYER3(-ispow[x], v, gainpow2_scale_idx);
909 					else         *xrpnt = REAL_MUL_SCALE_LAYER3( ispow[x], v, gainpow2_scale_idx);
910 
911 					num--;
912 					mask <<= 1;
913 				}
914 				else *xrpnt = DOUBLE_TO_REAL(0.0);
915 
916 				xrpnt += step;
917 				CHECK_XRPNT;
918 				if(y == 15 && h->linbits)
919 				{
920 					max[lwin] = cb;
921 					REFRESH_MASK;
922 					y += ((MASK_UTYPE) mask) >> (BITSHIFT+8-h->linbits);
923 					num -= h->linbits+1;
924 					mask <<= h->linbits;
925 					if(MSB_MASK) *xrpnt = REAL_MUL_SCALE_LAYER3(-ispow[y], v, gainpow2_scale_idx);
926 					else         *xrpnt = REAL_MUL_SCALE_LAYER3( ispow[y], v, gainpow2_scale_idx);
927 
928 					mask <<= 1;
929 				}
930 				else if(y)
931 				{
932 					max[lwin] = cb;
933 					if(MSB_MASK) *xrpnt = REAL_MUL_SCALE_LAYER3(-ispow[y], v, gainpow2_scale_idx);
934 					else         *xrpnt = REAL_MUL_SCALE_LAYER3( ispow[y], v, gainpow2_scale_idx);
935 
936 					num--;
937 					mask <<= 1;
938 				}
939 				else *xrpnt = DOUBLE_TO_REAL(0.0);
940 
941 				xrpnt += step;
942 			}
943 		}
944 
945 		for(;l3 && (part2remain+num > 0);l3--)
946 		{
947 			const struct newhuff* h;
948 			const short* val;
949 			register short a;
950 
951 			h = htc+gr_info->count1table_select;
952 			val = h->table;
953 
954 			REFRESH_MASK;
955 			while((a=*val++)<0)
956 			{
957 				if(MSB_MASK) val -= a;
958 
959 				num--;
960 				mask <<= 1;
961 			}
962 			if(part2remain+num <= 0)
963 			{
964 				num -= part2remain+num;
965 				break;
966 			}
967 
968 			for(i=0;i<4;i++)
969 			{
970 				if(!(i & 1))
971 				{
972 					if(!mc)
973 					{
974 						mc = *m++;
975 						xrpnt = ((real *) xr) + (*m++);
976 						lwin = *m++;
977 						cb = *m++;
978 						if(lwin == 3)
979 						{
980 #ifdef REAL_IS_FIXED
981 							gainpow2_scale_idx = (int)(gr_info->pow2gain + (*scf << shift) - fr->gainpow2);
982 #endif
983 							v = gr_info->pow2gain[(*scf++) << shift];
984 							step = 1;
985 						}
986 						else
987 						{
988 #ifdef REAL_IS_FIXED
989 							gainpow2_scale_idx = (int)(gr_info->full_gain[lwin] + (*scf << shift) - fr->gainpow2);
990 #endif
991 							v = gr_info->full_gain[lwin][(*scf++) << shift];
992 							step = 3;
993 						}
994 					}
995 					mc--;
996 				}
997 				CHECK_XRPNT;
998 				if( (a & (0x8>>i)) )
999 				{
1000 					max[lwin] = cb;
1001 					if(part2remain+num <= 0)
1002 					break;
1003 
1004 					if(MSB_MASK) *xrpnt = -REAL_SCALE_LAYER3(v, gainpow2_scale_idx);
1005 					else         *xrpnt =  REAL_SCALE_LAYER3(v, gainpow2_scale_idx);
1006 
1007 					num--;
1008 					mask <<= 1;
1009 				}
1010 				else *xrpnt = DOUBLE_TO_REAL(0.0);
1011 
1012 				xrpnt += step;
1013 			}
1014 		}
1015 
1016 		if(lwin < 3)
1017 		{ /* short band? */
1018 			while(1)
1019 			{
1020 				for(;mc > 0;mc--)
1021 				{
1022 					CHECK_XRPNT;
1023 					*xrpnt = DOUBLE_TO_REAL(0.0); xrpnt += 3; /* short band -> step=3 */
1024 					*xrpnt = DOUBLE_TO_REAL(0.0); xrpnt += 3;
1025 				}
1026 				if(m >= me)
1027 				break;
1028 
1029 				mc    = *m++;
1030 				xrpnt = ((real *) xr) + *m++;
1031 				if(*m++ == 0)
1032 				break; /* optimize: field will be set to zero at the end of the function */
1033 
1034 				m++; /* cb */
1035 			}
1036 		}
1037 
1038 		gr_info->maxband[0] = max[0]+1;
1039 		gr_info->maxband[1] = max[1]+1;
1040 		gr_info->maxband[2] = max[2]+1;
1041 		gr_info->maxbandl   = max[3]+1;
1042 
1043 		{
1044 			int rmax = max[0] > max[1] ? max[0] : max[1];
1045 			rmax = (rmax > max[2] ? rmax : max[2]) + 1;
1046 			gr_info->maxb = rmax ? fr->shortLimit[sfreq][rmax] : fr->longLimit[sfreq][max[3]+1];
1047 		}
1048 
1049 	}
1050 	else
1051 	{
1052 		/* decoding with 'long' BandIndex table (block_type != 2) */
1053 		const unsigned char *pretab = pretab_choice[gr_info->preflag];
1054 		int i,max = -1;
1055 		int cb = 0;
1056 		int *m = map[sfreq][2];
1057 		register real v = 0.0;
1058 		int mc = 0;
1059 
1060 		/* long hash table values */
1061 		for(i=0;i<3;i++)
1062 		{
1063 			int lp = l[i];
1064 			const struct newhuff *h = ht+gr_info->table_select[i];
1065 
1066 			for(;lp;lp--,mc--)
1067 			{
1068 				MASK_STYPE x,y;
1069 				if(!mc)
1070 				{
1071 					mc = *m++;
1072 					cb = *m++;
1073 #ifdef CUT_SFB21
1074 					if(cb == 21)
1075 						v = 0.0;
1076 					else
1077 #endif
1078 					{
1079 #ifdef REAL_IS_FIXED
1080 						gainpow2_scale_idx = (int)(gr_info->pow2gain + (*scf << shift) - fr->gainpow2);
1081 #endif
1082 						v = gr_info->pow2gain[(*(scf++) + (*pretab++)) << shift];
1083 					}
1084 				}
1085 				{
1086 					const short *val = h->table;
1087 					REFRESH_MASK;
1088 #ifdef USE_NEW_HUFFTABLE
1089 					while((y=val[(MASK_UTYPE)mask>>(BITSHIFT+4)])<0)
1090 					{
1091 						val -= y;
1092 						num -= 4;
1093 						mask <<= 4;
1094 					}
1095 					num -= (y >> 8);
1096 					mask <<= (y >> 8);
1097 					x = (y >> 4) & 0xf;
1098 					y &= 0xf;
1099 #else
1100 					while((y=*val++)<0)
1101 					{
1102 						if (MSB_MASK) val -= y;
1103 
1104 						num--;
1105 						mask <<= 1;
1106 					}
1107 					x = y >> 4;
1108 					y &= 0xf;
1109 #endif
1110 				}
1111 
1112 				CHECK_XRPNT;
1113 				if(x == 15 && h->linbits)
1114 				{
1115 					max = cb;
1116 					REFRESH_MASK;
1117 					x += ((MASK_UTYPE) mask) >> (BITSHIFT+8-h->linbits);
1118 					num -= h->linbits+1;
1119 					mask <<= h->linbits;
1120 					if(MSB_MASK) *xrpnt++ = REAL_MUL_SCALE_LAYER3(-ispow[x], v, gainpow2_scale_idx);
1121 					else         *xrpnt++ = REAL_MUL_SCALE_LAYER3( ispow[x], v, gainpow2_scale_idx);
1122 
1123 					mask <<= 1;
1124 				}
1125 				else if(x)
1126 				{
1127 					max = cb;
1128 					if(MSB_MASK) *xrpnt++ = REAL_MUL_SCALE_LAYER3(-ispow[x], v, gainpow2_scale_idx);
1129 					else         *xrpnt++ = REAL_MUL_SCALE_LAYER3( ispow[x], v, gainpow2_scale_idx);
1130 					num--;
1131 
1132 					mask <<= 1;
1133 				}
1134 				else *xrpnt++ = DOUBLE_TO_REAL(0.0);
1135 
1136 				CHECK_XRPNT;
1137 				if(y == 15 && h->linbits)
1138 				{
1139 					max = cb;
1140 					REFRESH_MASK;
1141 					y += ((MASK_UTYPE) mask) >> (BITSHIFT+8-h->linbits);
1142 					num -= h->linbits+1;
1143 					mask <<= h->linbits;
1144 					if(MSB_MASK) *xrpnt++ = REAL_MUL_SCALE_LAYER3(-ispow[y], v, gainpow2_scale_idx);
1145 					else         *xrpnt++ = REAL_MUL_SCALE_LAYER3( ispow[y], v, gainpow2_scale_idx);
1146 
1147 					mask <<= 1;
1148 				}
1149 				else if(y)
1150 				{
1151 					max = cb;
1152 					if(MSB_MASK) *xrpnt++ = REAL_MUL_SCALE_LAYER3(-ispow[y], v, gainpow2_scale_idx);
1153 					else         *xrpnt++ = REAL_MUL_SCALE_LAYER3( ispow[y], v, gainpow2_scale_idx);
1154 
1155 					num--;
1156 					mask <<= 1;
1157 				}
1158 				else *xrpnt++ = DOUBLE_TO_REAL(0.0);
1159 			}
1160 		}
1161 
1162 		/* short (count1table) values */
1163 		for(;l3 && (part2remain+num > 0);l3--)
1164 		{
1165 			const struct newhuff *h = htc+gr_info->count1table_select;
1166 			const short *val = h->table;
1167 			register short a;
1168 
1169 			REFRESH_MASK;
1170 			while((a=*val++)<0)
1171 			{
1172 				if (MSB_MASK) val -= a;
1173 
1174 				num--;
1175 				mask <<= 1;
1176 			}
1177 			if(part2remain+num <= 0)
1178 			{
1179 				num -= part2remain+num;
1180 				break;
1181 			}
1182 
1183 			for(i=0;i<4;i++)
1184 			{
1185 				if(!(i & 1))
1186 				{
1187 					if(!mc)
1188 					{
1189 						mc = *m++;
1190 						cb = *m++;
1191 #ifdef CUT_SFB21
1192 						if(cb == 21)
1193 							v = 0.0;
1194 						else
1195 #endif
1196 						{
1197 #ifdef REAL_IS_FIXED
1198 							gainpow2_scale_idx = (int)(gr_info->pow2gain + (*scf << shift) - fr->gainpow2);
1199 #endif
1200 							v = gr_info->pow2gain[((*scf++) + (*pretab++)) << shift];
1201 						}
1202 					}
1203 					mc--;
1204 				}
1205 				CHECK_XRPNT;
1206 				if( (a & (0x8>>i)) )
1207 				{
1208 					max = cb;
1209 					if(part2remain+num <= 0)
1210 					break;
1211 
1212 					if(MSB_MASK) *xrpnt++ = -REAL_SCALE_LAYER3(v, gainpow2_scale_idx);
1213 					else         *xrpnt++ =  REAL_SCALE_LAYER3(v, gainpow2_scale_idx);
1214 
1215 					num--;
1216 					mask <<= 1;
1217 				}
1218 				else *xrpnt++ = DOUBLE_TO_REAL(0.0);
1219 			}
1220 		}
1221 
1222 		gr_info->maxbandl = max+1;
1223 		gr_info->maxb = fr->longLimit[sfreq][gr_info->maxbandl];
1224 	}
1225 
1226 	part2remain += num;
1227 	backbits(fr, num);
1228 	num = 0;
1229 	}
1230 	else
1231 	{
1232 		part2remain = 0;
1233 		/* Not entirely sure what good values are, must be > 0. */
1234 		gr_info->maxband[0] =
1235 		gr_info->maxband[1] =
1236 		gr_info->maxband[2] =
1237 		gr_info->maxbandl   = 1; /* sfb=maxband[lwin]*3 + lwin - mixed_block_flag must be >= 0 */
1238 		gr_info->maxb       = 1;
1239 	}
1240 
1241 	while(xrpnt < &xr[SBLIMIT][0])
1242 	*xrpnt++ = DOUBLE_TO_REAL(0.0);
1243 
1244 	while( part2remain > 16 )
1245 	{
1246 		skipbits(fr, 16); /* Dismiss stuffing Bits */
1247 		part2remain -= 16;
1248 	}
1249 	if(part2remain > 0) skipbits(fr, part2remain);
1250 	else if(part2remain < 0)
1251 	{
1252 		debug1("Can't rewind stream by %d bits!",-part2remain);
1253 		return 1; /* -> error */
1254 	}
1255 	return 0;
1256 }
1257 
1258 
1259 /* calculate real channel values for Joint-I-Stereo-mode */
1260 static void III_i_stereo(real xr_buf[2][SBLIMIT][SSLIMIT],int *scalefac, struct gr_info_s *gr_info,int sfreq,int ms_stereo,int lsf)
1261 {
1262 	real (*xr)[SBLIMIT*SSLIMIT] = (real (*)[SBLIMIT*SSLIMIT] ) xr_buf;
1263 	const struct bandInfoStruct *bi = &bandInfo[sfreq];
1264 
1265 	const real *tab1,*tab2;
1266 
1267 #if 1
1268 	int tab;
1269 /* TODO: optimize as static */
1270 	const real *tabs[3][2][2] =
1271 	{
1272 		{ { tan1_1,tan2_1 }       , { tan1_2,tan2_2 } },
1273 		{ { pow1_1[0],pow2_1[0] } , { pow1_2[0],pow2_2[0] } },
1274 		{ { pow1_1[1],pow2_1[1] } , { pow1_2[1],pow2_2[1] } }
1275 	};
1276 
1277 	tab = lsf + (gr_info->scalefac_compress & lsf);
1278 	tab1 = tabs[tab][ms_stereo][0];
1279 	tab2 = tabs[tab][ms_stereo][1];
1280 #else
1281 	if(lsf)
1282 	{
1283 		int p = gr_info->scalefac_compress & 0x1;
1284 		if(ms_stereo)
1285 		{
1286 			tab1 = pow1_2[p];
1287 			tab2 = pow2_2[p];
1288 		}
1289 		else
1290 		{
1291 			tab1 = pow1_1[p];
1292 			tab2 = pow2_1[p];
1293 		}
1294 	}
1295 	else
1296 	{
1297 		if(ms_stereo)
1298 		{
1299 			tab1 = tan1_2;
1300 			tab2 = tan2_2;
1301 		}
1302 		else
1303 		{
1304 			tab1 = tan1_1;
1305 			tab2 = tan2_1;
1306 		}
1307 	}
1308 #endif
1309 
1310 	if(gr_info->block_type == 2)
1311 	{
1312 		int lwin,do_l = 0;
1313 		if( gr_info->mixed_block_flag ) do_l = 1;
1314 
1315 		for(lwin=0;lwin<3;lwin++)
1316 		{ /* process each window */
1317 			/* get first band with zero values */
1318 			int is_p,sb,idx,sfb = gr_info->maxband[lwin];  /* sfb is minimal 3 for mixed mode */
1319 			if(sfb > 3) do_l = 0;
1320 
1321 			for(;sfb<12;sfb++)
1322 			{
1323 				is_p = scalefac[sfb*3+lwin-gr_info->mixed_block_flag]; /* scale: 0-15 */
1324 				if(is_p != 7)
1325 				{
1326 					real t1,t2;
1327 					sb  = bi->shortDiff[sfb];
1328 					idx = bi->shortIdx[sfb] + lwin;
1329 					t1  = tab1[is_p]; t2 = tab2[is_p];
1330 					for (; sb > 0; sb--,idx+=3)
1331 					{
1332 						real v = xr[0][idx];
1333 						xr[0][idx] = REAL_MUL_15(v, t1);
1334 						xr[1][idx] = REAL_MUL_15(v, t2);
1335 					}
1336 				}
1337 			}
1338 
1339 #if 1
1340 /* in the original: copy 10 to 11 , here: copy 11 to 12
1341 maybe still wrong??? (copy 12 to 13?) */
1342 			is_p = scalefac[11*3+lwin-gr_info->mixed_block_flag]; /* scale: 0-15 */
1343 			sb   = bi->shortDiff[12];
1344 			idx  = bi->shortIdx[12] + lwin;
1345 #else
1346 			is_p = scalefac[10*3+lwin-gr_info->mixed_block_flag]; /* scale: 0-15 */
1347 			sb   = bi->shortDiff[11];
1348 			idx  = bi->shortIdx[11] + lwin;
1349 #endif
1350 			if(is_p != 7)
1351 			{
1352 				real t1,t2;
1353 				t1 = tab1[is_p]; t2 = tab2[is_p];
1354 				for( ; sb > 0; sb--,idx+=3 )
1355 				{
1356 					real v = xr[0][idx];
1357 					xr[0][idx] = REAL_MUL_15(v, t1);
1358 					xr[1][idx] = REAL_MUL_15(v, t2);
1359 				}
1360 			}
1361 		} /* end for(lwin; .. ; . ) */
1362 
1363 		/* also check l-part, if ALL bands in the three windows are 'empty' and mode = mixed_mode */
1364 		if(do_l)
1365 		{
1366 			int sfb = gr_info->maxbandl;
1367 			int idx;
1368 			if(sfb > 21) return; /* similarity fix related to CVE-2006-1655 */
1369 
1370 			idx = bi->longIdx[sfb];
1371 			for( ; sfb<8; sfb++ )
1372 			{
1373 				int sb = bi->longDiff[sfb];
1374 				int is_p = scalefac[sfb]; /* scale: 0-15 */
1375 				if(is_p != 7)
1376 				{
1377 					real t1,t2;
1378 					t1 = tab1[is_p]; t2 = tab2[is_p];
1379 					for( ; sb > 0; sb--,idx++)
1380 					{
1381 						real v = xr[0][idx];
1382 						xr[0][idx] = REAL_MUL_15(v, t1);
1383 						xr[1][idx] = REAL_MUL_15(v, t2);
1384 					}
1385 				}
1386 				else idx += sb;
1387 			}
1388 		}
1389 	}
1390 	else
1391 	{ /* ((gr_info->block_type != 2)) */
1392 		int sfb = gr_info->maxbandl;
1393 		int is_p,idx;
1394 		if(sfb > 21) return; /* tightened fix for CVE-2006-1655 */
1395 
1396 		idx = bi->longIdx[sfb];
1397 		for ( ; sfb<21; sfb++)
1398 		{
1399 			int sb = bi->longDiff[sfb];
1400 			is_p = scalefac[sfb]; /* scale: 0-15 */
1401 			if(is_p != 7)
1402 			{
1403 				real t1,t2;
1404 				t1 = tab1[is_p]; t2 = tab2[is_p];
1405 				for( ; sb > 0; sb--,idx++)
1406 				{
1407 					 real v = xr[0][idx];
1408 					 xr[0][idx] = REAL_MUL_15(v, t1);
1409 					 xr[1][idx] = REAL_MUL_15(v, t2);
1410 				}
1411 			}
1412 			else idx += sb;
1413 		}
1414 
1415 		is_p = scalefac[20];
1416 		if(is_p != 7)
1417 		{  /* copy l-band 20 to l-band 21 */
1418 			int sb;
1419 			real t1 = tab1[is_p],t2 = tab2[is_p];
1420 
1421 			for( sb = bi->longDiff[21]; sb > 0; sb--,idx++ )
1422 			{
1423 				real v = xr[0][idx];
1424 				xr[0][idx] = REAL_MUL_15(v, t1);
1425 				xr[1][idx] = REAL_MUL_15(v, t2);
1426 			}
1427 		}
1428 	}
1429 }
1430 
1431 
1432 static void III_antialias(real xr[SBLIMIT][SSLIMIT],struct gr_info_s *gr_info)
1433 {
1434 	int sblim;
1435 
1436 	if(gr_info->block_type == 2)
1437 	{
1438 			if(!gr_info->mixed_block_flag) return;
1439 
1440 			sblim = 1;
1441 	}
1442 	else sblim = gr_info->maxb-1;
1443 
1444 	/* 31 alias-reduction operations between each pair of sub-bands */
1445 	/* with 8 butterflies between each pair                         */
1446 
1447 	{
1448 		int sb;
1449 		real *xr1=(real *) xr[1];
1450 
1451 		for(sb=sblim; sb; sb--,xr1+=10)
1452 		{
1453 			int ss;
1454 			real *cs=aa_cs,*ca=aa_ca;
1455 			real *xr2 = xr1;
1456 
1457 			for(ss=7;ss>=0;ss--)
1458 			{ /* upper and lower butterfly inputs */
1459 				register real bu = *--xr2,bd = *xr1;
1460 				*xr2   = REAL_MUL(bu, *cs) - REAL_MUL(bd, *ca);
1461 				*xr1++ = REAL_MUL(bd, *cs++) + REAL_MUL(bu, *ca++);
1462 			}
1463 		}
1464 	}
1465 }
1466 
1467 /*
1468 	This is an optimized DCT from Jeff Tsay's maplay 1.2+ package.
1469 	Saved one multiplication by doing the 'twiddle factor' stuff
1470 	together with the window mul. (MH)
1471 
1472 	This uses Byeong Gi Lee's Fast Cosine Transform algorithm, but the
1473 	9 point IDCT needs to be reduced further. Unfortunately, I don't
1474 	know how to do that, because 9 is not an even number. - Jeff.
1475 
1476 	Original Message:
1477 
1478 	9 Point Inverse Discrete Cosine Transform
1479 
1480 	This piece of code is Copyright 1997 Mikko Tommila and is freely usable
1481 	by anybody. The algorithm itself is of course in the public domain.
1482 
1483 	Again derived heuristically from the 9-point WFTA.
1484 
1485 	The algorithm is optimized (?) for speed, not for small rounding errors or
1486 	good readability.
1487 
1488 	36 additions, 11 multiplications
1489 
1490 	Again this is very likely sub-optimal.
1491 
1492 	The code is optimized to use a minimum number of temporary variables,
1493 	so it should compile quite well even on 8-register Intel x86 processors.
1494 	This makes the code quite obfuscated and very difficult to understand.
1495 
1496 	References:
1497 	[1] S. Winograd: "On Computing the Discrete Fourier Transform",
1498 	    Mathematics of Computation, Volume 32, Number 141, January 1978,
1499 	    Pages 175-199
1500 */
1501 
1502 /* Calculation of the inverse MDCT
1503    used to be static without 3dnow - does that really matter? */
1504 void dct36(real *inbuf,real *o1,real *o2,real *wintab,real *tsbuf)
1505 {
1506 #ifdef NEW_DCT9
1507 	real tmp[18];
1508 #endif
1509 
1510 	{
1511 		register real *in = inbuf;
1512 
1513 		in[17]+=in[16]; in[16]+=in[15]; in[15]+=in[14];
1514 		in[14]+=in[13]; in[13]+=in[12]; in[12]+=in[11];
1515 		in[11]+=in[10]; in[10]+=in[9];  in[9] +=in[8];
1516 		in[8] +=in[7];  in[7] +=in[6];  in[6] +=in[5];
1517 		in[5] +=in[4];  in[4] +=in[3];  in[3] +=in[2];
1518 		in[2] +=in[1];  in[1] +=in[0];
1519 
1520 		in[17]+=in[15]; in[15]+=in[13]; in[13]+=in[11]; in[11]+=in[9];
1521 		in[9] +=in[7];  in[7] +=in[5];  in[5] +=in[3];  in[3] +=in[1];
1522 
1523 
1524 #ifdef NEW_DCT9
1525 #if 1
1526 		{
1527 			real t3;
1528 			{
1529 				real t0, t1, t2;
1530 
1531 				t0 = REAL_MUL(COS6_2, (in[8] + in[16] - in[4]));
1532 				t1 = REAL_MUL(COS6_2, in[12]);
1533 
1534 				t3 = in[0];
1535 				t2 = t3 - t1 - t1;
1536 				tmp[1] = tmp[7] = t2 - t0;
1537 				tmp[4]          = t2 + t0 + t0;
1538 				t3 += t1;
1539 
1540 				t2 = REAL_MUL(COS6_1, (in[10] + in[14] - in[2]));
1541 				tmp[1] -= t2;
1542 				tmp[7] += t2;
1543 			}
1544 			{
1545 				real t0, t1, t2;
1546 
1547 				t0 = REAL_MUL(cos9[0], (in[4] + in[8] ));
1548 				t1 = REAL_MUL(cos9[1], (in[8] - in[16]));
1549 				t2 = REAL_MUL(cos9[2], (in[4] + in[16]));
1550 
1551 				tmp[2] = tmp[6] = t3 - t0      - t2;
1552 				tmp[0] = tmp[8] = t3 + t0 + t1;
1553 				tmp[3] = tmp[5] = t3      - t1 + t2;
1554 			}
1555 		}
1556 		{
1557 			real t1, t2, t3;
1558 
1559 			t1 = REAL_MUL(cos18[0], (in[2]  + in[10]));
1560 			t2 = REAL_MUL(cos18[1], (in[10] - in[14]));
1561 			t3 = REAL_MUL(COS6_1,    in[6]);
1562 
1563 			{
1564 				real t0 = t1 + t2 + t3;
1565 				tmp[0] += t0;
1566 				tmp[8] -= t0;
1567 			}
1568 
1569 			t2 -= t3;
1570 			t1 -= t3;
1571 
1572 			t3 = REAL_MUL(cos18[2], (in[2] + in[14]));
1573 
1574 			t1 += t3;
1575 			tmp[3] += t1;
1576 			tmp[5] -= t1;
1577 
1578 			t2 -= t3;
1579 			tmp[2] += t2;
1580 			tmp[6] -= t2;
1581 		}
1582 
1583 #else
1584 		{
1585 			real t0, t1, t2, t3, t4, t5, t6, t7;
1586 
1587 			t1 = REAL_MUL(COS6_2, in[12]);
1588 			t2 = REAL_MUL(COS6_2, (in[8] + in[16] - in[4]));
1589 
1590 			t3 = in[0] + t1;
1591 			t4 = in[0] - t1 - t1;
1592 			t5     = t4 - t2;
1593 			tmp[4] = t4 + t2 + t2;
1594 
1595 			t0 = REAL_MUL(cos9[0], (in[4] + in[8]));
1596 			t1 = REAL_MUL(cos9[1], (in[8] - in[16]));
1597 
1598 			t2 = REAL_MUL(cos9[2], (in[4] + in[16]));
1599 
1600 			t6 = t3 - t0 - t2;
1601 			t0 += t3 + t1;
1602 			t3 += t2 - t1;
1603 
1604 			t2 = REAL_MUL(cos18[0], (in[2]  + in[10]));
1605 			t4 = REAL_MUL(cos18[1], (in[10] - in[14]));
1606 			t7 = REAL_MUL(COS6_1, in[6]);
1607 
1608 			t1 = t2 + t4 + t7;
1609 			tmp[0] = t0 + t1;
1610 			tmp[8] = t0 - t1;
1611 			t1 = REAL_MUL(cos18[2], (in[2] + in[14]));
1612 			t2 += t1 - t7;
1613 
1614 			tmp[3] = t3 + t2;
1615 			t0 = REAL_MUL(COS6_1, (in[10] + in[14] - in[2]));
1616 			tmp[5] = t3 - t2;
1617 
1618 			t4 -= t1 + t7;
1619 
1620 			tmp[1] = t5 - t0;
1621 			tmp[7] = t5 + t0;
1622 			tmp[2] = t6 + t4;
1623 			tmp[6] = t6 - t4;
1624 		}
1625 #endif
1626 
1627 		{
1628 			real t0, t1, t2, t3, t4, t5, t6, t7;
1629 
1630 			t1 = REAL_MUL(COS6_2, in[13]);
1631 			t2 = REAL_MUL(COS6_2, (in[9] + in[17] - in[5]));
1632 
1633 			t3 = in[1] + t1;
1634 			t4 = in[1] - t1 - t1;
1635 			t5 = t4 - t2;
1636 
1637 			t0 = REAL_MUL(cos9[0], (in[5] + in[9]));
1638 			t1 = REAL_MUL(cos9[1], (in[9] - in[17]));
1639 
1640 			tmp[13] = REAL_MUL((t4 + t2 + t2), tfcos36[17-13]);
1641 			t2 = REAL_MUL(cos9[2], (in[5] + in[17]));
1642 
1643 			t6 = t3 - t0 - t2;
1644 			t0 += t3 + t1;
1645 			t3 += t2 - t1;
1646 
1647 			t2 = REAL_MUL(cos18[0], (in[3]  + in[11]));
1648 			t4 = REAL_MUL(cos18[1], (in[11] - in[15]));
1649 			t7 = REAL_MUL(COS6_1, in[7]);
1650 
1651 			t1 = t2 + t4 + t7;
1652 			tmp[17] = REAL_MUL((t0 + t1), tfcos36[17-17]);
1653 			tmp[9]  = REAL_MUL((t0 - t1), tfcos36[17-9]);
1654 			t1 = REAL_MUL(cos18[2], (in[3] + in[15]));
1655 			t2 += t1 - t7;
1656 
1657 			tmp[14] = REAL_MUL((t3 + t2), tfcos36[17-14]);
1658 			t0 = REAL_MUL(COS6_1, (in[11] + in[15] - in[3]));
1659 			tmp[12] = REAL_MUL((t3 - t2), tfcos36[17-12]);
1660 
1661 			t4 -= t1 + t7;
1662 
1663 			tmp[16] = REAL_MUL((t5 - t0), tfcos36[17-16]);
1664 			tmp[10] = REAL_MUL((t5 + t0), tfcos36[17-10]);
1665 			tmp[15] = REAL_MUL((t6 + t4), tfcos36[17-15]);
1666 			tmp[11] = REAL_MUL((t6 - t4), tfcos36[17-11]);
1667 		}
1668 
1669 #define MACRO(v) { \
1670 		real tmpval; \
1671 		tmpval = tmp[(v)] + tmp[17-(v)]; \
1672 		out2[9+(v)] = REAL_MUL(tmpval, w[27+(v)]); \
1673 		out2[8-(v)] = REAL_MUL(tmpval, w[26-(v)]); \
1674 		tmpval = tmp[(v)] - tmp[17-(v)]; \
1675 		ts[SBLIMIT*(8-(v))] = out1[8-(v)] + REAL_MUL(tmpval, w[8-(v)]); \
1676 		ts[SBLIMIT*(9+(v))] = out1[9+(v)] + REAL_MUL(tmpval, w[9+(v)]); }
1677 
1678 		{
1679 			register real *out2 = o2;
1680 			register real *w = wintab;
1681 			register real *out1 = o1;
1682 			register real *ts = tsbuf;
1683 
1684 			MACRO(0);
1685 			MACRO(1);
1686 			MACRO(2);
1687 			MACRO(3);
1688 			MACRO(4);
1689 			MACRO(5);
1690 			MACRO(6);
1691 			MACRO(7);
1692 			MACRO(8);
1693 		}
1694 
1695 #else
1696 
1697 		{
1698 
1699 #define MACRO0(v) { \
1700 	real tmp; \
1701 	out2[9+(v)] = REAL_MUL((tmp = sum0 + sum1), w[27+(v)]); \
1702 	out2[8-(v)] = REAL_MUL(tmp, w[26-(v)]);   } \
1703 	sum0 -= sum1; \
1704 	ts[SBLIMIT*(8-(v))] = out1[8-(v)] + REAL_MUL(sum0, w[8-(v)]); \
1705 	ts[SBLIMIT*(9+(v))] = out1[9+(v)] + REAL_MUL(sum0, w[9+(v)]);
1706 #define MACRO1(v) { \
1707 	real sum0,sum1; \
1708 	sum0 = tmp1a + tmp2a; \
1709 	sum1 = REAL_MUL((tmp1b + tmp2b), tfcos36[(v)]); \
1710 	MACRO0(v); }
1711 #define MACRO2(v) { \
1712 	real sum0,sum1; \
1713 	sum0 = tmp2a - tmp1a; \
1714 	sum1 = REAL_MUL((tmp2b - tmp1b), tfcos36[(v)]); \
1715 	MACRO0(v); }
1716 
1717 			register const real *c = COS9;
1718 			register real *out2 = o2;
1719 			register real *w = wintab;
1720 			register real *out1 = o1;
1721 			register real *ts = tsbuf;
1722 
1723 			real ta33,ta66,tb33,tb66;
1724 
1725 			ta33 = REAL_MUL(in[2*3+0], c[3]);
1726 			ta66 = REAL_MUL(in[2*6+0], c[6]);
1727 			tb33 = REAL_MUL(in[2*3+1], c[3]);
1728 			tb66 = REAL_MUL(in[2*6+1], c[6]);
1729 
1730 			{
1731 				real tmp1a,tmp2a,tmp1b,tmp2b;
1732 				tmp1a = REAL_MUL(in[2*1+0], c[1]) + ta33 + REAL_MUL(in[2*5+0], c[5]) + REAL_MUL(in[2*7+0], c[7]);
1733 				tmp1b = REAL_MUL(in[2*1+1], c[1]) + tb33 + REAL_MUL(in[2*5+1], c[5]) + REAL_MUL(in[2*7+1], c[7]);
1734 				tmp2a = REAL_MUL(in[2*2+0], c[2]) + REAL_MUL(in[2*4+0], c[4]) + ta66 + REAL_MUL(in[2*8+0], c[8]);
1735 				tmp2b = REAL_MUL(in[2*2+1], c[2]) + REAL_MUL(in[2*4+1], c[4]) + tb66 + REAL_MUL(in[2*8+1], c[8]);
1736 
1737 				MACRO1(0);
1738 				MACRO2(8);
1739 			}
1740 
1741 			{
1742 				real tmp1a,tmp2a,tmp1b,tmp2b;
1743 				tmp1a = REAL_MUL(( in[2*1+0] - in[2*5+0] - in[2*7+0] ), c[3]);
1744 				tmp1b = REAL_MUL(( in[2*1+1] - in[2*5+1] - in[2*7+1] ), c[3]);
1745 				tmp2a = REAL_MUL(( in[2*2+0] - in[2*4+0] - in[2*8+0] ), c[6]) - in[2*6+0] + in[2*0+0];
1746 				tmp2b = REAL_MUL(( in[2*2+1] - in[2*4+1] - in[2*8+1] ), c[6]) - in[2*6+1] + in[2*0+1];
1747 
1748 				MACRO1(1);
1749 				MACRO2(7);
1750 			}
1751 
1752 			{
1753 				real tmp1a,tmp2a,tmp1b,tmp2b;
1754 				tmp1a =   REAL_MUL(in[2*1+0], c[5]) - ta33 - REAL_MUL(in[2*5+0], c[7]) + REAL_MUL(in[2*7+0], c[1]);
1755 				tmp1b =   REAL_MUL(in[2*1+1], c[5]) - tb33 - REAL_MUL(in[2*5+1], c[7]) + REAL_MUL(in[2*7+1], c[1]);
1756 				tmp2a = - REAL_MUL(in[2*2+0], c[8]) - REAL_MUL(in[2*4+0], c[2]) + ta66 + REAL_MUL(in[2*8+0], c[4]);
1757 				tmp2b = - REAL_MUL(in[2*2+1], c[8]) - REAL_MUL(in[2*4+1], c[2]) + tb66 + REAL_MUL(in[2*8+1], c[4]);
1758 
1759 				MACRO1(2);
1760 				MACRO2(6);
1761 			}
1762 
1763 			{
1764 				real tmp1a,tmp2a,tmp1b,tmp2b;
1765 				tmp1a =   REAL_MUL(in[2*1+0], c[7]) - ta33 + REAL_MUL(in[2*5+0], c[1]) - REAL_MUL(in[2*7+0], c[5]);
1766 				tmp1b =   REAL_MUL(in[2*1+1], c[7]) - tb33 + REAL_MUL(in[2*5+1], c[1]) - REAL_MUL(in[2*7+1], c[5]);
1767 				tmp2a = - REAL_MUL(in[2*2+0], c[4]) + REAL_MUL(in[2*4+0], c[8]) + ta66 - REAL_MUL(in[2*8+0], c[2]);
1768 				tmp2b = - REAL_MUL(in[2*2+1], c[4]) + REAL_MUL(in[2*4+1], c[8]) + tb66 - REAL_MUL(in[2*8+1], c[2]);
1769 
1770 				MACRO1(3);
1771 				MACRO2(5);
1772 			}
1773 
1774 			{
1775 				real sum0,sum1;
1776 				sum0 =  in[2*0+0] - in[2*2+0] + in[2*4+0] - in[2*6+0] + in[2*8+0];
1777 				sum1 = REAL_MUL((in[2*0+1] - in[2*2+1] + in[2*4+1] - in[2*6+1] + in[2*8+1] ), tfcos36[4]);
1778 				MACRO0(4);
1779 			}
1780 		}
1781 #endif
1782 
1783 	}
1784 }
1785 
1786 
1787 /* new DCT12 */
1788 static void dct12(real *in,real *rawout1,real *rawout2,register real *wi,register real *ts)
1789 {
1790 #define DCT12_PART1 \
1791 	in5 = in[5*3];  \
1792 	in5 += (in4 = in[4*3]); \
1793 	in4 += (in3 = in[3*3]); \
1794 	in3 += (in2 = in[2*3]); \
1795 	in2 += (in1 = in[1*3]); \
1796 	in1 += (in0 = in[0*3]); \
1797 	\
1798 	in5 += in3; in3 += in1; \
1799 	\
1800 	in2 = REAL_MUL(in2, COS6_1); \
1801 	in3 = REAL_MUL(in3, COS6_1);
1802 
1803 #define DCT12_PART2 \
1804 	in0 += REAL_MUL(in4, COS6_2); \
1805 	\
1806 	in4 = in0 + in2; \
1807 	in0 -= in2;      \
1808 	\
1809 	in1 += REAL_MUL(in5, COS6_2); \
1810 	\
1811 	in5 = REAL_MUL((in1 + in3), tfcos12[0]); \
1812 	in1 = REAL_MUL((in1 - in3), tfcos12[2]); \
1813 	\
1814 	in3 = in4 + in5; \
1815 	in4 -= in5;      \
1816 	\
1817 	in2 = in0 + in1; \
1818 	in0 -= in1;
1819 
1820 	{
1821 		real in0,in1,in2,in3,in4,in5;
1822 		register real *out1 = rawout1;
1823 		ts[SBLIMIT*0] = out1[0]; ts[SBLIMIT*1] = out1[1]; ts[SBLIMIT*2] = out1[2];
1824 		ts[SBLIMIT*3] = out1[3]; ts[SBLIMIT*4] = out1[4]; ts[SBLIMIT*5] = out1[5];
1825 
1826 		DCT12_PART1
1827 
1828 		{
1829 			real tmp0,tmp1 = (in0 - in4);
1830 			{
1831 				real tmp2 = REAL_MUL((in1 - in5), tfcos12[1]);
1832 				tmp0 = tmp1 + tmp2;
1833 				tmp1 -= tmp2;
1834 			}
1835 			ts[(17-1)*SBLIMIT] = out1[17-1] + REAL_MUL(tmp0, wi[11-1]);
1836 			ts[(12+1)*SBLIMIT] = out1[12+1] + REAL_MUL(tmp0, wi[6+1]);
1837 			ts[(6 +1)*SBLIMIT] = out1[6 +1] + REAL_MUL(tmp1, wi[1]);
1838 			ts[(11-1)*SBLIMIT] = out1[11-1] + REAL_MUL(tmp1, wi[5-1]);
1839 		}
1840 
1841 		DCT12_PART2
1842 
1843 		ts[(17-0)*SBLIMIT] = out1[17-0] + REAL_MUL(in2, wi[11-0]);
1844 		ts[(12+0)*SBLIMIT] = out1[12+0] + REAL_MUL(in2, wi[6+0]);
1845 		ts[(12+2)*SBLIMIT] = out1[12+2] + REAL_MUL(in3, wi[6+2]);
1846 		ts[(17-2)*SBLIMIT] = out1[17-2] + REAL_MUL(in3, wi[11-2]);
1847 
1848 		ts[(6 +0)*SBLIMIT]  = out1[6+0] + REAL_MUL(in0, wi[0]);
1849 		ts[(11-0)*SBLIMIT] = out1[11-0] + REAL_MUL(in0, wi[5-0]);
1850 		ts[(6 +2)*SBLIMIT]  = out1[6+2] + REAL_MUL(in4, wi[2]);
1851 		ts[(11-2)*SBLIMIT] = out1[11-2] + REAL_MUL(in4, wi[5-2]);
1852 	}
1853 
1854 	in++;
1855 
1856 	{
1857 		real in0,in1,in2,in3,in4,in5;
1858 		register real *out2 = rawout2;
1859 
1860 		DCT12_PART1
1861 
1862 		{
1863 			real tmp0,tmp1 = (in0 - in4);
1864 			{
1865 				real tmp2 = REAL_MUL((in1 - in5), tfcos12[1]);
1866 				tmp0 = tmp1 + tmp2;
1867 				tmp1 -= tmp2;
1868 			}
1869 			out2[5-1] = REAL_MUL(tmp0, wi[11-1]);
1870 			out2[0+1] = REAL_MUL(tmp0, wi[6+1]);
1871 			ts[(12+1)*SBLIMIT] += REAL_MUL(tmp1, wi[1]);
1872 			ts[(17-1)*SBLIMIT] += REAL_MUL(tmp1, wi[5-1]);
1873 		}
1874 
1875 		DCT12_PART2
1876 
1877 		out2[5-0] = REAL_MUL(in2, wi[11-0]);
1878 		out2[0+0] = REAL_MUL(in2, wi[6+0]);
1879 		out2[0+2] = REAL_MUL(in3, wi[6+2]);
1880 		out2[5-2] = REAL_MUL(in3, wi[11-2]);
1881 
1882 		ts[(12+0)*SBLIMIT] += REAL_MUL(in0, wi[0]);
1883 		ts[(17-0)*SBLIMIT] += REAL_MUL(in0, wi[5-0]);
1884 		ts[(12+2)*SBLIMIT] += REAL_MUL(in4, wi[2]);
1885 		ts[(17-2)*SBLIMIT] += REAL_MUL(in4, wi[5-2]);
1886 	}
1887 
1888 	in++;
1889 
1890 	{
1891 		real in0,in1,in2,in3,in4,in5;
1892 		register real *out2 = rawout2;
1893 		out2[12]=out2[13]=out2[14]=out2[15]=out2[16]=out2[17]=0.0;
1894 
1895 		DCT12_PART1
1896 
1897 		{
1898 			real tmp0,tmp1 = (in0 - in4);
1899 			{
1900 				real tmp2 = REAL_MUL((in1 - in5), tfcos12[1]);
1901 				tmp0 = tmp1 + tmp2;
1902 				tmp1 -= tmp2;
1903 			}
1904 			out2[11-1] = REAL_MUL(tmp0, wi[11-1]);
1905 			out2[6 +1] = REAL_MUL(tmp0, wi[6+1]);
1906 			out2[0+1] += REAL_MUL(tmp1, wi[1]);
1907 			out2[5-1] += REAL_MUL(tmp1, wi[5-1]);
1908 		}
1909 
1910 		DCT12_PART2
1911 
1912 		out2[11-0] = REAL_MUL(in2, wi[11-0]);
1913 		out2[6 +0] = REAL_MUL(in2, wi[6+0]);
1914 		out2[6 +2] = REAL_MUL(in3, wi[6+2]);
1915 		out2[11-2] = REAL_MUL(in3, wi[11-2]);
1916 
1917 		out2[0+0] += REAL_MUL(in0, wi[0]);
1918 		out2[5-0] += REAL_MUL(in0, wi[5-0]);
1919 		out2[0+2] += REAL_MUL(in4, wi[2]);
1920 		out2[5-2] += REAL_MUL(in4, wi[5-2]);
1921 	}
1922 }
1923 
1924 
1925 static void III_hybrid(real fsIn[SBLIMIT][SSLIMIT], real tsOut[SSLIMIT][SBLIMIT], int ch,struct gr_info_s *gr_info, mpg123_handle *fr)
1926 {
1927 	real (*block)[2][SBLIMIT*SSLIMIT] = fr->hybrid_block;
1928 	int *blc = fr->hybrid_blc;
1929 
1930 	real *tspnt = (real *) tsOut;
1931 	real *rawout1,*rawout2;
1932 	int bt = 0;
1933 	size_t sb = 0;
1934 
1935 	{
1936 		int b = blc[ch];
1937 		rawout1=block[b][ch];
1938 		b=-b+1;
1939 		rawout2=block[b][ch];
1940 		blc[ch] = b;
1941 	}
1942 
1943 	if(gr_info->mixed_block_flag)
1944 	{
1945 		sb = 2;
1946 		opt_dct36(fr)(fsIn[0],rawout1,rawout2,win[0],tspnt);
1947 		opt_dct36(fr)(fsIn[1],rawout1+18,rawout2+18,win1[0],tspnt+1);
1948 		rawout1 += 36; rawout2 += 36; tspnt += 2;
1949 	}
1950 
1951 	bt = gr_info->block_type;
1952 	if(bt == 2)
1953 	{
1954 		for(; sb<gr_info->maxb; sb+=2,tspnt+=2,rawout1+=36,rawout2+=36)
1955 		{
1956 			dct12(fsIn[sb]  ,rawout1   ,rawout2   ,win[2] ,tspnt);
1957 			dct12(fsIn[sb+1],rawout1+18,rawout2+18,win1[2],tspnt+1);
1958 		}
1959 	}
1960 	else
1961 	{
1962 		for(; sb<gr_info->maxb; sb+=2,tspnt+=2,rawout1+=36,rawout2+=36)
1963 		{
1964 			opt_dct36(fr)(fsIn[sb],rawout1,rawout2,win[bt],tspnt);
1965 			opt_dct36(fr)(fsIn[sb+1],rawout1+18,rawout2+18,win1[bt],tspnt+1);
1966 		}
1967 	}
1968 
1969 	for(;sb<SBLIMIT;sb++,tspnt++)
1970 	{
1971 		int i;
1972 		for(i=0;i<SSLIMIT;i++)
1973 		{
1974 			tspnt[i*SBLIMIT] = *rawout1++;
1975 			*rawout2++ = DOUBLE_TO_REAL(0.0);
1976 		}
1977 	}
1978 }
1979 
1980 
1981 /* And at the end... the main layer3 handler */
1982 int do_layer3(mpg123_handle *fr)
1983 {
1984 	int gr, ch, ss,clip=0;
1985 	int scalefacs[2][39]; /* max 39 for short[13][3] mode, mixed: 38, long: 22 */
1986 	struct III_sideinfo sideinfo;
1987 	int stereo = fr->stereo;
1988 	int single = fr->single;
1989 	int ms_stereo,i_stereo;
1990 	int sfreq = fr->sampling_frequency;
1991 	int stereo1,granules;
1992 
1993 	if(stereo == 1)
1994 	{ /* stream is mono */
1995 		stereo1 = 1;
1996 		single = SINGLE_LEFT;
1997 	}
1998 	else if(single != SINGLE_STEREO) /* stream is stereo, but force to mono */
1999 	stereo1 = 1;
2000 	else
2001 	stereo1 = 2;
2002 
2003 	if(fr->mode == MPG_MD_JOINT_STEREO)
2004 	{
2005 		ms_stereo = (fr->mode_ext & 0x2)>>1;
2006 		i_stereo  = fr->mode_ext & 0x1;
2007 	}
2008 	else ms_stereo = i_stereo = 0;
2009 
2010 	granules = fr->lsf ? 1 : 2;
2011 
2012 	/* quick hack to keep the music playing */
2013 	/* after having seen this nasty test file... */
2014 	if(III_get_side_info(fr, &sideinfo,stereo,ms_stereo,sfreq,single))
2015 	{
2016 		if(NOQUIET) error("bad frame - unable to get valid sideinfo");
2017 		return clip;
2018 	}
2019 
2020 	set_pointer(fr,sideinfo.main_data_begin);
2021 
2022 	for(gr=0;gr<granules;gr++)
2023 	{
2024 		/*  hybridIn[2][SBLIMIT][SSLIMIT] */
2025 		real (*hybridIn)[SBLIMIT][SSLIMIT] = fr->layer3.hybrid_in;
2026 		/*  hybridOut[2][SSLIMIT][SBLIMIT] */
2027 		real (*hybridOut)[SSLIMIT][SBLIMIT] = fr->layer3.hybrid_out;
2028 
2029 		{
2030 			struct gr_info_s *gr_info = &(sideinfo.ch[0].gr[gr]);
2031 			long part2bits;
2032 			if(fr->lsf)
2033 			part2bits = III_get_scale_factors_2(fr, scalefacs[0],gr_info,0);
2034 			else
2035 			part2bits = III_get_scale_factors_1(fr, scalefacs[0],gr_info,0,gr);
2036 
2037 			if(III_dequantize_sample(fr, hybridIn[0], scalefacs[0],gr_info,sfreq,part2bits))
2038 			{
2039 				if(VERBOSE2) error("dequantization failed!");
2040 				return clip;
2041 			}
2042 		}
2043 
2044 		if(stereo == 2)
2045 		{
2046 			struct gr_info_s *gr_info = &(sideinfo.ch[1].gr[gr]);
2047 			long part2bits;
2048 			if(fr->lsf)
2049 			part2bits = III_get_scale_factors_2(fr, scalefacs[1],gr_info,i_stereo);
2050 			else
2051 			part2bits = III_get_scale_factors_1(fr, scalefacs[1],gr_info,1,gr);
2052 
2053 			if(III_dequantize_sample(fr, hybridIn[1],scalefacs[1],gr_info,sfreq,part2bits))
2054 			{
2055 				if(VERBOSE2) error("dequantization failed!");
2056 				return clip;
2057 			}
2058 
2059 			if(ms_stereo)
2060 			{
2061 				int i;
2062 				unsigned int maxb = sideinfo.ch[0].gr[gr].maxb;
2063 				if(sideinfo.ch[1].gr[gr].maxb > maxb) maxb = sideinfo.ch[1].gr[gr].maxb;
2064 
2065 				for(i=0;i<SSLIMIT*(int)maxb;i++)
2066 				{
2067 					real tmp0 = ((real *)hybridIn[0])[i];
2068 					real tmp1 = ((real *)hybridIn[1])[i];
2069 					((real *)hybridIn[0])[i] = tmp0 + tmp1;
2070 					((real *)hybridIn[1])[i] = tmp0 - tmp1;
2071 				}
2072 			}
2073 
2074 			if(i_stereo) III_i_stereo(hybridIn,scalefacs[1],gr_info,sfreq,ms_stereo,fr->lsf);
2075 
2076 			if(ms_stereo || i_stereo || (single == SINGLE_MIX) )
2077 			{
2078 				if(gr_info->maxb > sideinfo.ch[0].gr[gr].maxb)
2079 				sideinfo.ch[0].gr[gr].maxb = gr_info->maxb;
2080 				else
2081 				gr_info->maxb = sideinfo.ch[0].gr[gr].maxb;
2082 			}
2083 
2084 			switch(single)
2085 			{
2086 				case SINGLE_MIX:
2087 				{
2088 					register int i;
2089 					register real *in0 = (real *) hybridIn[0],*in1 = (real *) hybridIn[1];
2090 					for(i=0;i<SSLIMIT*(int)gr_info->maxb;i++,in0++)
2091 					*in0 = (*in0 + *in1++); /* *0.5 done by pow-scale */
2092 				}
2093 				break;
2094 				case SINGLE_RIGHT:
2095 				{
2096 					register int i;
2097 					register real *in0 = (real *) hybridIn[0],*in1 = (real *) hybridIn[1];
2098 					for(i=0;i<SSLIMIT*(int)gr_info->maxb;i++)
2099 					*in0++ = *in1++;
2100 				}
2101 				break;
2102 			}
2103 		}
2104 
2105 		for(ch=0;ch<stereo1;ch++)
2106 		{
2107 			struct gr_info_s *gr_info = &(sideinfo.ch[ch].gr[gr]);
2108 			III_antialias(hybridIn[ch],gr_info);
2109 			III_hybrid(hybridIn[ch], hybridOut[ch], ch,gr_info, fr);
2110 		}
2111 
2112 #ifdef OPT_I486
2113 		if(single != SINGLE_STEREO || fr->af.encoding != MPG123_ENC_SIGNED_16 || fr->down_sample != 0)
2114 		{
2115 #endif
2116 		for(ss=0;ss<SSLIMIT;ss++)
2117 		{
2118 			if(single != SINGLE_STEREO)
2119 			clip += (fr->synth_mono)(hybridOut[0][ss], fr);
2120 			else
2121 			clip += (fr->synth_stereo)(hybridOut[0][ss], hybridOut[1][ss], fr);
2122 
2123 		}
2124 #ifdef OPT_I486
2125 		} else
2126 		{
2127 			/* Only stereo, 16 bits benefit from the 486 optimization. */
2128 			ss=0;
2129 			while(ss < SSLIMIT)
2130 			{
2131 				int n;
2132 				n=(fr->buffer.size - fr->buffer.fill) / (2*2*32);
2133 				if(n > (SSLIMIT-ss)) n=SSLIMIT-ss;
2134 
2135 				/* Clip counting makes no sense with this function. */
2136 				absynth_1to1_i486(hybridOut[0][ss], 0, fr, n);
2137 				absynth_1to1_i486(hybridOut[1][ss], 1, fr, n);
2138 				ss+=n;
2139 				fr->buffer.fill+=(2*2*32)*n;
2140 			}
2141 		}
2142 #endif
2143 	}
2144 
2145 	return clip;
2146 }
2147