xref: /reactos/sdk/lib/3rdparty/libmpg123/optimize.c (revision 40462c92)
1 /*
2 	optimize: get a grip on the different optimizations
3 
4 	copyright 2006-9 by the mpg123 project - free software under the terms of the LGPL 2.1
5 	see COPYING and AUTHORS files in distribution or http://mpg123.org
6 	initially written by Thomas Orgis, inspired by 3DNow stuff in mpg123.[hc]
7 
8 	Currently, this file contains the struct and function to choose an optimization variant and works only when OPT_MULTI is in effect.
9 */
10 
11 #define I_AM_OPTIMIZE
12 #include "mpg123lib_intern.h" /* includes optimize.h */
13 #include "debug.h"
14 
15 #if ((defined OPT_X86) || (defined OPT_X86_64) || (defined OPT_NEON) || (defined OPT_NEON64)) && (defined OPT_MULTI)
16 #include "getcpuflags.h"
17 static struct cpuflags cpu_flags;
18 #else
19 /* Faking stuff for non-multi builds. The same code for synth function choice is used.
20    Just no runtime dependency of result... */
21 #define cpu_flags nothing
22 #define cpu_i586(s)     1
23 #define cpu_fpu(s)      1
24 #define cpu_mmx(s)      1
25 #define cpu_3dnow(s)    1
26 #define cpu_3dnowext(s) 1
27 #define cpu_sse(s)      1
28 #define cpu_sse2(s)     1
29 #define cpu_sse3(s)     1
30 #define cpu_avx(s)      1
31 #define cpu_neon(s)     1
32 #endif
33 
34 /* Ugly macros to build conditional synth function array values. */
35 
36 #ifndef NO_8BIT
37 #define IF8(synth) synth,
38 #else
39 #define IF8(synth)
40 #endif
41 
42 #ifndef NO_SYNTH32
43 
44 #ifndef NO_REAL
45 #define IFREAL(synth) synth,
46 #else
47 #define IFREAL(synth)
48 #endif
49 
50 #ifndef NO_32BIT
51 #define IF32(synth) synth
52 #else
53 #define IF32(synth)
54 #endif
55 
56 #else
57 
58 #define IFREAL(synth)
59 #define IF32(synth)
60 
61 #endif
62 
63 #ifndef NO_16BIT
64 #	define OUT_SYNTHS(synth_16, synth_8, synth_real, synth_32) { synth_16, IF8(synth_8) IFREAL(synth_real) IF32(synth_32) }
65 #else
66 #	define OUT_SYNTHS(synth_16, synth_8, synth_real, synth_32) { IF8(synth_8) IFREAL(synth_real) IF32(synth_32) }
67 #endif
68 
69 /* The call of left and right plain synth, wrapped.
70    This may be replaced by a direct stereo optimized synth. */
71 static int synth_stereo_wrap(real *bandPtr_l, real *bandPtr_r, mpg123_handle *fr)
72 {
73 	int clip;
74 	clip  = (fr->synth)(bandPtr_l, 0, fr, 0);
75 	clip += (fr->synth)(bandPtr_r, 1, fr, 1);
76 	return clip;
77 }
78 
79 static const struct synth_s synth_base =
80 {
81 	{ /* plain */
82 		 OUT_SYNTHS(synth_1to1, synth_1to1_8bit, synth_1to1_real, synth_1to1_s32)
83 #		ifndef NO_DOWNSAMPLE
84 		,OUT_SYNTHS(synth_2to1, synth_2to1_8bit, synth_2to1_real, synth_2to1_s32)
85 		,OUT_SYNTHS(synth_4to1, synth_4to1_8bit, synth_4to1_real, synth_4to1_s32)
86 #		endif
87 #		ifndef NO_NTOM
88 		,OUT_SYNTHS(synth_ntom, synth_ntom_8bit, synth_ntom_real, synth_ntom_s32)
89 #		endif
90 	},
91 	{ /* stereo, by default only wrappers over plain synth */
92 		 OUT_SYNTHS(synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap)
93 #		ifndef NO_DOWNSAMPLE
94 		,OUT_SYNTHS(synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap)
95 		,OUT_SYNTHS(synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap)
96 #		endif
97 #		ifndef NO_NTOM
98 		,OUT_SYNTHS(synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap)
99 #		endif
100 	},
101 	{ /* mono2stereo */
102 		 OUT_SYNTHS(synth_1to1_m2s, synth_1to1_8bit_m2s, synth_1to1_real_m2s, synth_1to1_s32_m2s)
103 #		ifndef NO_DOWNSAMPLE
104 		,OUT_SYNTHS(synth_2to1_m2s, synth_2to1_8bit_m2s, synth_2to1_real_m2s, synth_2to1_s32_m2s)
105 		,OUT_SYNTHS(synth_4to1_m2s, synth_4to1_8bit_m2s, synth_4to1_real_m2s, synth_4to1_s32_m2s)
106 #		endif
107 #		ifndef NO_NTOM
108 		,OUT_SYNTHS(synth_ntom_m2s, synth_ntom_8bit_m2s, synth_ntom_real_m2s, synth_ntom_s32_m2s)
109 #		endif
110 	},
111 	{ /* mono*/
112 		 OUT_SYNTHS(synth_1to1_mono, synth_1to1_8bit_mono, synth_1to1_real_mono, synth_1to1_s32_mono)
113 #		ifndef NO_DOWNSAMPLE
114 		,OUT_SYNTHS(synth_2to1_mono, synth_2to1_8bit_mono, synth_2to1_real_mono, synth_2to1_s32_mono)
115 		,OUT_SYNTHS(synth_4to1_mono, synth_4to1_8bit_mono, synth_4to1_real_mono, synth_4to1_s32_mono)
116 #		endif
117 #		ifndef NO_NTOM
118 		,OUT_SYNTHS(synth_ntom_mono, synth_ntom_8bit_mono, synth_ntom_real_mono, synth_ntom_s32_mono)
119 #endif
120 	}
121 };
122 
123 #ifdef OPT_X86
124 /* More plain synths for i386 */
125 const func_synth plain_i386[r_limit][f_limit] =
126 { /* plain */
127 	 OUT_SYNTHS(synth_1to1_i386, synth_1to1_8bit_i386, synth_1to1_real_i386, synth_1to1_s32_i386)
128 #	ifndef NO_DOWNSAMPLE
129 	,OUT_SYNTHS(synth_2to1_i386, synth_2to1_8bit_i386, synth_2to1_real_i386, synth_2to1_s32_i386)
130 	,OUT_SYNTHS(synth_4to1_i386, synth_4to1_8bit_i386, synth_4to1_real_i386, synth_4to1_s32_i386)
131 #	endif
132 #	ifndef NO_NTOM
133 	,OUT_SYNTHS(synth_ntom, synth_ntom_8bit, synth_ntom_real, synth_ntom_s32)
134 #	endif
135 };
136 #endif
137 
138 
139 enum optdec defdec(void){ return defopt; }
140 
141 enum optcla decclass(const enum optdec type)
142 {
143 	return
144 	(
145 		   type == mmx
146 		|| type == sse
147 		|| type == sse_vintage
148 		|| type == dreidnowext
149 		|| type == dreidnowext_vintage
150 		|| type == x86_64
151 		|| type == neon
152 		|| type == neon64
153 		|| type == avx
154 	) ? mmxsse : normal;
155 }
156 
157 static int find_synth(func_synth synth,  const func_synth synths[r_limit][f_limit])
158 {
159 	enum synth_resample ri;
160 	enum synth_format   fi;
161 	for(ri=0; ri<r_limit; ++ri)
162 	for(fi=0; fi<f_limit; ++fi)
163 	if(synth == synths[ri][fi])
164 	return TRUE;
165 
166 	return FALSE;
167 }
168 
169 
170 #if defined(OPT_SSE) || defined(OPT_SSE_VINTAGE)
171 /* After knowing that it is either vintage or current SSE,
172    this separates the two. In case of non-OPT_MULTI, only one
173    of OPT_SSE and OPT_SSE_VINTAGE is active. */
174 static enum optdec sse_or_vintage(mpg123_handle *fr)
175 {
176 	enum optdec type;
177 	type = sse_vintage;
178 #	ifdef OPT_SSE
179 #	ifdef OPT_MULTI
180 	if(fr->cpu_opts.the_dct36 == dct36_sse)
181 #	endif
182 	type = sse;
183 #	endif
184 	return type;
185 }
186 #endif
187 
188 /* Determine what kind of decoder is actually active
189    This depends on runtime choices which may cause fallback to i386 or generic code. */
190 static int find_dectype(mpg123_handle *fr)
191 {
192 	enum optdec type = nodec;
193 	/* Direct and indirect usage, 1to1 stereo decoding.
194 	   Concentrating on the plain stereo synth should be fine, mono stuff is derived. */
195 	func_synth basic_synth = fr->synth;
196 #ifndef NO_8BIT
197 #ifndef NO_16BIT
198 	if(basic_synth == synth_1to1_8bit_wrap)
199 	basic_synth = fr->synths.plain[r_1to1][f_16]; /* That is what's really below the surface. */
200 #endif
201 #endif
202 
203 	if(FALSE) ; /* Just to initialize the else if ladder. */
204 #ifndef NO_16BIT
205 #if defined(OPT_3DNOWEXT) || defined(OPT_3DNOWEXT_VINTAGE)
206 	else if(basic_synth == synth_1to1_3dnowext)
207 	{
208 		type = dreidnowext;
209 #		ifdef OPT_3DNOWEXT_VINTAGE
210 #		ifdef OPT_MULTI
211 		if(fr->cpu_opts.the_dct36 == dct36_3dnowext)
212 #		endif
213 		type = dreidnowext_vintage;
214 #		endif
215 	}
216 #endif
217 #if defined(OPT_SSE) || defined(OPT_SSE_VINTAGE)
218 	else if(basic_synth == synth_1to1_sse)
219 	{
220 		type = sse_or_vintage(fr);
221 	}
222 #endif
223 #if defined(OPT_3DNOW) || defined(OPT_3DNOW_VINTAGE)
224 	else if(basic_synth == synth_1to1_3dnow)
225 	{
226 		type = dreidnow;
227 #		ifdef OPT_3DNOW_VINTAGE
228 #		ifdef OPT_MULTI
229 		if(fr->cpu_opts.the_dct36 == dct36_3dnow)
230 #		endif
231 		type = dreidnow_vintage;
232 #		endif
233 	}
234 #endif
235 #ifdef OPT_MMX
236 	else if(basic_synth == synth_1to1_mmx) type = mmx;
237 #endif
238 #ifdef OPT_I586_DITHER
239 	else if(basic_synth == synth_1to1_i586_dither) type = ifuenf_dither;
240 #endif
241 #ifdef OPT_I586
242 	else if(basic_synth == synth_1to1_i586) type = ifuenf;
243 #endif
244 #ifdef OPT_ALTIVEC
245 	else if(basic_synth == synth_1to1_altivec) type = altivec;
246 #endif
247 #ifdef OPT_X86_64
248 	else if(basic_synth == synth_1to1_x86_64) type = x86_64;
249 #endif
250 #ifdef OPT_AVX
251 	else if(basic_synth == synth_1to1_avx) type = avx;
252 #endif
253 #ifdef OPT_ARM
254 	else if(basic_synth == synth_1to1_arm) type = arm;
255 #endif
256 #ifdef OPT_NEON
257 	else if(basic_synth == synth_1to1_neon) type = neon;
258 #endif
259 #ifdef OPT_NEON64
260 	else if(basic_synth == synth_1to1_neon64) type = neon64;
261 #endif
262 #ifdef OPT_GENERIC_DITHER
263 	else if(basic_synth == synth_1to1_dither) type = generic_dither;
264 #endif
265 #ifdef OPT_DITHER /* either i586 or generic! */
266 #ifndef NO_DOWNSAMPLE
267 	else if
268 	(
269 		   basic_synth == synth_2to1_dither
270 		|| basic_synth == synth_4to1_dither
271 	) type = generic_dither;
272 #endif
273 #endif
274 #endif /* 16bit */
275 
276 #ifndef NO_SYNTH32
277 
278 #ifndef NO_REAL
279 #if defined(OPT_SSE) || defined(OPT_SSE_VINTAGE)
280 	else if(basic_synth == synth_1to1_real_sse)
281 	{
282 		type = sse_or_vintage(fr);
283 	}
284 #endif
285 #ifdef OPT_X86_64
286 	else if(basic_synth == synth_1to1_real_x86_64) type = x86_64;
287 #endif
288 #ifdef OPT_AVX
289 	else if(basic_synth == synth_1to1_real_avx) type = avx;
290 #endif
291 #ifdef OPT_ALTIVEC
292 	else if(basic_synth == synth_1to1_real_altivec) type = altivec;
293 #endif
294 #ifdef OPT_NEON
295 	else if(basic_synth == synth_1to1_real_neon) type = neon;
296 #endif
297 #ifdef OPT_NEON64
298 	else if(basic_synth == synth_1to1_real_neon64) type = neon64;
299 #endif
300 
301 #endif /* real */
302 
303 #ifndef NO_32BIT
304 #if defined(OPT_SSE) || defined(OPT_SSE_VINTAGE)
305 	else if(basic_synth == synth_1to1_s32_sse)
306 	{
307 		type = sse_or_vintage(fr);
308 	}
309 #endif
310 #ifdef OPT_X86_64
311 	else if(basic_synth == synth_1to1_s32_x86_64) type = x86_64;
312 #endif
313 #ifdef OPT_AVX
314 	else if(basic_synth == synth_1to1_s32_avx) type = avx;
315 #endif
316 #ifdef OPT_ALTIVEC
317 	else if(basic_synth == synth_1to1_s32_altivec) type = altivec;
318 #endif
319 #ifdef OPT_NEON
320 	else if(basic_synth == synth_1to1_s32_neon) type = neon;
321 #endif
322 #ifdef OPT_NEON64
323 	else if(basic_synth == synth_1to1_s32_neon64) type = neon64;
324 #endif
325 #endif /* 32bit */
326 
327 #endif /* any 32 bit synth */
328 
329 #ifdef OPT_X86
330 	else if(find_synth(basic_synth, plain_i386))
331 	type = idrei;
332 #endif
333 
334 	else if(find_synth(basic_synth, synth_base.plain))
335 	type = generic;
336 
337 
338 
339 #ifdef OPT_I486
340 	/* i486 is special ... the specific code is in use for 16bit 1to1 stereo
341 	   otherwise we have i386 active... but still, the distinction doesn't matter*/
342 	type = ivier;
343 #endif
344 
345 	if(type != nodec)
346 	{
347 		fr->cpu_opts.type = type;
348 		fr->cpu_opts.class = decclass(type);
349 
350 		debug3("determined active decoder type %i (%s) of class %i", type, decname[type], fr->cpu_opts.class);
351 		return MPG123_OK;
352 	}
353 	else
354 	{
355 		if(NOQUIET) error("Unable to determine active decoder type -- this is SERIOUS b0rkage!");
356 
357 		fr->err = MPG123_BAD_DECODER_SETUP;
358 		return MPG123_ERR;
359 	}
360 }
361 
362 /* set synth functions for current frame, optimizations handled by opt_* macros */
363 int set_synth_functions(mpg123_handle *fr)
364 {
365 	enum synth_resample resample = r_none;
366 	enum synth_format basic_format = f_none; /* Default is always 16bit, or whatever. */
367 
368 	/* Select the basic output format, different from 16bit: 8bit, real. */
369 	if(FALSE){}
370 #ifndef NO_16BIT
371 	else if(fr->af.dec_enc & MPG123_ENC_16)
372 	basic_format = f_16;
373 #endif
374 #ifndef NO_8BIT
375 	else if(fr->af.dec_enc & MPG123_ENC_8)
376 	basic_format = f_8;
377 #endif
378 #ifndef NO_REAL
379 	else if(fr->af.dec_enc & MPG123_ENC_FLOAT)
380 	basic_format = f_real;
381 #endif
382 #ifndef NO_32BIT
383 	/* 24 bit integer means decoding to 32 bit first. */
384 	else if(fr->af.dec_enc & MPG123_ENC_32 || fr->af.dec_enc & MPG123_ENC_24)
385 	basic_format = f_32;
386 #endif
387 
388 	/* Make sure the chosen format is compiled into this lib. */
389 	if(basic_format == f_none)
390 	{
391 		if(NOQUIET) error("set_synth_functions: This output format is disabled in this build!");
392 
393 		return -1;
394 	}
395 
396 	/* Be explicit about downsampling variant. */
397 	switch(fr->down_sample)
398 	{
399 		case 0: resample = r_1to1; break;
400 #ifndef NO_DOWNSAMPLE
401 		case 1: resample = r_2to1; break;
402 		case 2: resample = r_4to1; break;
403 #endif
404 #ifndef NO_NTOM
405 		case 3: resample = r_ntom; break;
406 #endif
407 	}
408 
409 	if(resample == r_none)
410 	{
411 		if(NOQUIET) error("set_synth_functions: This resampling mode is not supported in this build!");
412 
413 		return -1;
414 	}
415 
416 	debug2("selecting synth: resample=%i format=%i", resample, basic_format);
417 	/* Finally selecting the synth functions for stereo / mono. */
418 	fr->synth = fr->synths.plain[resample][basic_format];
419 	fr->synth_stereo = fr->synths.stereo[resample][basic_format];
420 	fr->synth_mono = fr->af.channels==2
421 		? fr->synths.mono2stereo[resample][basic_format] /* Mono MPEG file decoded to stereo. */
422 		: fr->synths.mono[resample][basic_format];       /* Mono MPEG file decoded to mono. */
423 
424 	if(find_dectype(fr) != MPG123_OK) /* Actually determine the currently active decoder breed. */
425 	{
426 		fr->err = MPG123_BAD_DECODER_SETUP;
427 		return MPG123_ERR;
428 	}
429 
430 	if(frame_buffers(fr) != 0)
431 	{
432 		fr->err = MPG123_NO_BUFFERS;
433 		if(NOQUIET) error("Failed to set up decoder buffers!");
434 
435 		return MPG123_ERR;
436 	}
437 
438 #ifndef NO_8BIT
439 	if(basic_format == f_8)
440 	{
441 		if(make_conv16to8_table(fr) != 0)
442 		{
443 			if(NOQUIET) error("Failed to set up conv16to8 table!");
444 			/* it's a bit more work to get proper error propagation up */
445 			return -1;
446 		}
447 	}
448 #endif
449 
450 #ifdef OPT_MMXORSSE
451 	/* Special treatment for MMX, SSE and 3DNowExt stuff.
452 	   The real-decoding SSE for x86-64 uses normal tables! */
453 	if(fr->cpu_opts.class == mmxsse
454 #	ifndef NO_REAL
455 	   && basic_format != f_real
456 #	endif
457 #	ifndef NO_32BIT
458 	   && basic_format != f_32
459 #	endif
460 #	ifdef ACCURATE_ROUNDING
461 	   && fr->cpu_opts.type != sse
462 	   && fr->cpu_opts.type != sse_vintage
463 	   && fr->cpu_opts.type != x86_64
464 	   && fr->cpu_opts.type != neon
465 	   && fr->cpu_opts.type != neon64
466 	   && fr->cpu_opts.type != avx
467 #	endif
468 	  )
469 	{
470 #ifndef NO_LAYER3
471 		init_layer3_stuff(fr, init_layer3_gainpow2_mmx);
472 #endif
473 #ifndef NO_LAYER12
474 		init_layer12_stuff(fr, init_layer12_table_mmx);
475 #endif
476 		fr->make_decode_tables = make_decode_tables_mmx;
477 	}
478 	else
479 #endif
480 	{
481 #ifndef NO_LAYER3
482 		init_layer3_stuff(fr, init_layer3_gainpow2);
483 #endif
484 #ifndef NO_LAYER12
485 		init_layer12_stuff(fr, init_layer12_table);
486 #endif
487 		fr->make_decode_tables = make_decode_tables;
488 	}
489 
490 	/* We allocated the table buffers just now, so (re)create the tables. */
491 	fr->make_decode_tables(fr);
492 
493 	return 0;
494 }
495 
496 int frame_cpu_opt(mpg123_handle *fr, const char* cpu)
497 {
498 	const char* chosen = ""; /* the chosen decoder opt as string */
499 	enum optdec want_dec = nodec;
500 	int done = 0;
501 	int auto_choose = 0;
502 #ifdef OPT_DITHER
503 	int dithered = FALSE; /* If some dithered decoder is chosen. */
504 #endif
505 
506 	want_dec = dectype(cpu);
507 	auto_choose = want_dec == autodec;
508 	/* Fill whole array of synth functions with generic code first. */
509 	fr->synths = synth_base;
510 
511 #ifndef OPT_MULTI
512 	{
513 		if(!auto_choose && want_dec != defopt)
514 		{
515 			if(NOQUIET) error2("you wanted decoder type %i, I only have %i", want_dec, defopt);
516 		}
517 		auto_choose = TRUE; /* There will be only one choice anyway. */
518 	}
519 #endif
520 
521 	fr->cpu_opts.type = nodec;
522 #ifdef OPT_MULTI
523 #ifndef NO_LAYER3
524 #if (defined OPT_3DNOW_VINTAGE || defined OPT_3DNOWEXT_VINTAGE || defined OPT_SSE || defined OPT_X86_64 || defined OPT_AVX || defined OPT_NEON || defined OPT_NEON64)
525 	fr->cpu_opts.the_dct36 = dct36;
526 #endif
527 #endif
528 #endif
529 	/* covers any i386+ cpu; they actually differ only in the synth_1to1 function, mostly... */
530 #ifdef OPT_X86
531 	if(cpu_i586(cpu_flags))
532 	{
533 #		ifdef OPT_MULTI
534 		debug2("standard flags: 0x%08x\textended flags: 0x%08x", cpu_flags.std, cpu_flags.ext);
535 #		endif
536 #		ifdef OPT_SSE
537 		if(   !done && (auto_choose || want_dec == sse)
538 		   && cpu_sse(cpu_flags) && cpu_mmx(cpu_flags) )
539 		{
540 			chosen = dn_sse;
541 			fr->cpu_opts.type = sse;
542 #ifdef OPT_MULTI
543 #			ifndef NO_LAYER3
544 			/* if(cpu_fast_sse(cpu_flags)) */ fr->cpu_opts.the_dct36 = dct36_sse;
545 #			endif
546 #endif
547 #			ifndef NO_16BIT
548 			fr->synths.plain[r_1to1][f_16] = synth_1to1_sse;
549 #			ifdef ACCURATE_ROUNDING
550 			fr->synths.stereo[r_1to1][f_16] = synth_1to1_stereo_sse;
551 #			endif
552 #			endif
553 #			ifndef NO_REAL
554 			fr->synths.plain[r_1to1][f_real] = synth_1to1_real_sse;
555 			fr->synths.stereo[r_1to1][f_real] = synth_1to1_real_stereo_sse;
556 #			endif
557 #			ifndef NO_32BIT
558 			fr->synths.plain[r_1to1][f_32] = synth_1to1_s32_sse;
559 			fr->synths.stereo[r_1to1][f_32] = synth_1to1_s32_stereo_sse;
560 #			endif
561 			done = 1;
562 		}
563 #		endif
564 #		ifdef OPT_SSE_VINTAGE
565 		if(   !done && (auto_choose || want_dec == sse_vintage)
566 		   && cpu_sse(cpu_flags) && cpu_mmx(cpu_flags) )
567 		{
568 			chosen = dn_sse_vintage;
569 			fr->cpu_opts.type = sse_vintage;
570 #			ifndef NO_16BIT
571 			fr->synths.plain[r_1to1][f_16] = synth_1to1_sse;
572 #			ifdef ACCURATE_ROUNDING
573 			fr->synths.stereo[r_1to1][f_16] = synth_1to1_stereo_sse;
574 #			endif
575 #			endif
576 #			ifndef NO_REAL
577 			fr->synths.plain[r_1to1][f_real] = synth_1to1_real_sse;
578 			fr->synths.stereo[r_1to1][f_real] = synth_1to1_real_stereo_sse;
579 #			endif
580 #			ifndef NO_32BIT
581 			fr->synths.plain[r_1to1][f_32] = synth_1to1_s32_sse;
582 			fr->synths.stereo[r_1to1][f_32] = synth_1to1_s32_stereo_sse;
583 #			endif
584 			done = 1;
585 		}
586 #		endif
587 #		ifdef OPT_3DNOWEXT
588 		if(   !done && (auto_choose || want_dec == dreidnowext)
589 		   && cpu_3dnow(cpu_flags)
590 		   && cpu_3dnowext(cpu_flags)
591 		   && cpu_mmx(cpu_flags) )
592 		{
593 			chosen = dn_dreidnowext;
594 			fr->cpu_opts.type = dreidnowext;
595 #			ifndef NO_16BIT
596 			fr->synths.plain[r_1to1][f_16] = synth_1to1_3dnowext;
597 #			endif
598 			done = 1;
599 		}
600 #		endif
601 #		ifdef OPT_3DNOWEXT_VINTAGE
602 		if(   !done && (auto_choose || want_dec == dreidnowext_vintage)
603 		   && cpu_3dnow(cpu_flags)
604 		   && cpu_3dnowext(cpu_flags)
605 		   && cpu_mmx(cpu_flags) )
606 		{
607 			chosen = dn_dreidnowext_vintage;
608 			fr->cpu_opts.type = dreidnowext_vintage;
609 #ifdef OPT_MULTI
610 #			ifndef NO_LAYER3
611 			fr->cpu_opts.the_dct36 = dct36_3dnowext;
612 #			endif
613 #endif
614 #			ifndef NO_16BIT
615 			fr->synths.plain[r_1to1][f_16] = synth_1to1_3dnowext;
616 #			endif
617 			done = 1;
618 		}
619 #		endif
620 #		ifdef OPT_3DNOW
621 		if(    !done && (auto_choose || want_dec == dreidnow)
622 		    && cpu_3dnow(cpu_flags) && cpu_mmx(cpu_flags) )
623 		{
624 			chosen = dn_dreidnow;
625 			fr->cpu_opts.type = dreidnow;
626 #			ifndef NO_16BIT
627 			fr->synths.plain[r_1to1][f_16] = synth_1to1_3dnow;
628 #			endif
629 			done = 1;
630 		}
631 #		endif
632 #		ifdef OPT_3DNOW_VINTAGE
633 		if(    !done && (auto_choose || want_dec == dreidnow_vintage)
634 		    && cpu_3dnow(cpu_flags) && cpu_mmx(cpu_flags) )
635 		{
636 			chosen = dn_dreidnow_vintage;
637 			fr->cpu_opts.type = dreidnow_vintage;
638 #ifdef OPT_MULTI
639 #			ifndef NO_LAYER3
640 			fr->cpu_opts.the_dct36 = dct36_3dnow;
641 #			endif
642 #endif
643 #			ifndef NO_16BIT
644 			fr->synths.plain[r_1to1][f_16] = synth_1to1_3dnow;
645 #			endif
646 			done = 1;
647 		}
648 #		endif
649 		#ifdef OPT_MMX
650 		if(   !done && (auto_choose || want_dec == mmx)
651 		   && cpu_mmx(cpu_flags) )
652 		{
653 			chosen = dn_mmx;
654 			fr->cpu_opts.type = mmx;
655 #			ifndef NO_16BIT
656 			fr->synths.plain[r_1to1][f_16] = synth_1to1_mmx;
657 #			endif
658 			done = 1;
659 		}
660 		#endif
661 		#ifdef OPT_I586
662 		if(!done && (auto_choose || want_dec == ifuenf))
663 		{
664 			chosen = "i586/pentium";
665 			fr->cpu_opts.type = ifuenf;
666 #			ifndef NO_16BIT
667 			fr->synths.plain[r_1to1][f_16] = synth_1to1_i586;
668 #			endif
669 			done = 1;
670 		}
671 		#endif
672 		#ifdef OPT_I586_DITHER
673 		if(!done && (auto_choose || want_dec == ifuenf_dither))
674 		{
675 			chosen = "dithered i586/pentium";
676 			fr->cpu_opts.type = ifuenf_dither;
677 			dithered = TRUE;
678 #			ifndef NO_16BIT
679 			fr->synths.plain[r_1to1][f_16] = synth_1to1_i586_dither;
680 #			ifndef NO_DOWNSAMPLE
681 			fr->synths.plain[r_2to1][f_16] = synth_2to1_dither;
682 			fr->synths.plain[r_4to1][f_16] = synth_4to1_dither;
683 #			endif
684 #			endif
685 			done = 1;
686 		}
687 		#endif
688 	}
689 	#ifdef OPT_I486
690 	/* That won't cooperate in multi opt mode - forcing i486 in layer3.c
691 	   But still... here it is... maybe for real use in future. */
692 	if(!done && (auto_choose || want_dec == ivier))
693 	{
694 		chosen = dn_ivier;
695 		fr->cpu_opts.type = ivier;
696 		done = 1;
697 	}
698 	#endif
699 	#ifdef OPT_I386
700 	if(!done && (auto_choose || want_dec == idrei))
701 	{
702 		chosen = dn_idrei;
703 		fr->cpu_opts.type = idrei;
704 		done = 1;
705 	}
706 	#endif
707 
708 	if(done)
709 	{
710 		/*
711 			We have chosen some x86 decoder... fillup some i386 stuff.
712 			There is an open question about using dithered synth_1to1 for 8bit wrappers.
713 			For quality it won't make sense, but wrapped i586_dither wrapped may still be faster...
714 		*/
715 		enum synth_resample ri;
716 		enum synth_format   fi;
717 #		ifndef NO_8BIT
718 #		ifndef NO_16BIT /* possibility to use a 16->8 wrapper... */
719 		if(fr->synths.plain[r_1to1][f_16] != synth_base.plain[r_1to1][f_16])
720 		{
721 			fr->synths.plain[r_1to1][f_8] = synth_1to1_8bit_wrap;
722 			fr->synths.mono[r_1to1][f_8] = synth_1to1_8bit_wrap_mono;
723 			fr->synths.mono2stereo[r_1to1][f_8] = synth_1to1_8bit_wrap_m2s;
724 		}
725 #		endif
726 #		endif
727 		for(ri=0; ri<r_limit; ++ri)
728 		for(fi=0; fi<f_limit; ++fi)
729 		{
730 			if(fr->synths.plain[ri][fi] == synth_base.plain[ri][fi])
731 			fr->synths.plain[ri][fi] = plain_i386[ri][fi];
732 		}
733 	}
734 
735 #endif /* OPT_X86 */
736 
737 #ifdef OPT_AVX
738 	if(!done && (auto_choose || want_dec == avx) && cpu_avx(cpu_flags))
739 	{
740 		chosen = "x86-64 (AVX)";
741 		fr->cpu_opts.type = avx;
742 #ifdef OPT_MULTI
743 #		ifndef NO_LAYER3
744 		fr->cpu_opts.the_dct36 = dct36_avx;
745 #		endif
746 #endif
747 #		ifndef NO_16BIT
748 		fr->synths.plain[r_1to1][f_16] = synth_1to1_avx;
749 		fr->synths.stereo[r_1to1][f_16] = synth_1to1_stereo_avx;
750 #		endif
751 #		ifndef NO_REAL
752 		fr->synths.plain[r_1to1][f_real] = synth_1to1_real_avx;
753 		fr->synths.stereo[r_1to1][f_real] = synth_1to1_fltst_avx;
754 #		endif
755 #		ifndef NO_32BIT
756 		fr->synths.plain[r_1to1][f_32] = synth_1to1_s32_avx;
757 		fr->synths.stereo[r_1to1][f_32] = synth_1to1_s32_stereo_avx;
758 #		endif
759 		done = 1;
760 	}
761 #endif
762 
763 #ifdef OPT_X86_64
764 	if(!done && (auto_choose || want_dec == x86_64))
765 	{
766 		chosen = "x86-64 (SSE)";
767 		fr->cpu_opts.type = x86_64;
768 #ifdef OPT_MULTI
769 #		ifndef NO_LAYER3
770 		fr->cpu_opts.the_dct36 = dct36_x86_64;
771 #		endif
772 #endif
773 #		ifndef NO_16BIT
774 		fr->synths.plain[r_1to1][f_16] = synth_1to1_x86_64;
775 		fr->synths.stereo[r_1to1][f_16] = synth_1to1_stereo_x86_64;
776 #		endif
777 #		ifndef NO_REAL
778 		fr->synths.plain[r_1to1][f_real] = synth_1to1_real_x86_64;
779 		fr->synths.stereo[r_1to1][f_real] = synth_1to1_real_stereo_x86_64;
780 #		endif
781 #		ifndef NO_32BIT
782 		fr->synths.plain[r_1to1][f_32] = synth_1to1_s32_x86_64;
783 		fr->synths.stereo[r_1to1][f_32] = synth_1to1_s32_stereo_x86_64;
784 #		endif
785 		done = 1;
786 	}
787 #endif
788 
789 #	ifdef OPT_ALTIVEC
790 	if(!done && (auto_choose || want_dec == altivec))
791 	{
792 		chosen = dn_altivec;
793 		fr->cpu_opts.type = altivec;
794 #		ifndef NO_16BIT
795 		fr->synths.plain[r_1to1][f_16] = synth_1to1_altivec;
796 		fr->synths.stereo[r_1to1][f_16] = synth_1to1_stereo_altivec;
797 #		endif
798 #		ifndef NO_REAL
799 		fr->synths.plain[r_1to1][f_real] = synth_1to1_real_altivec;
800 		fr->synths.stereo[r_1to1][f_real] = synth_1to1_fltst_altivec;
801 #		endif
802 #		ifndef NO_32BIT
803 		fr->synths.plain[r_1to1][f_32] = synth_1to1_s32_altivec;
804 		fr->synths.stereo[r_1to1][f_32] = synth_1to1_s32_stereo_altivec;
805 #		endif
806 		done = 1;
807 	}
808 #	endif
809 
810 #	ifdef OPT_NEON
811 	if(!done && (auto_choose || want_dec == neon) && cpu_neon(cpu_flags))
812 	{
813 		chosen = dn_neon;
814 		fr->cpu_opts.type = neon;
815 #ifdef OPT_MULTI
816 #		ifndef NO_LAYER3
817 		fr->cpu_opts.the_dct36 = dct36_neon;
818 #		endif
819 #endif
820 #		ifndef NO_16BIT
821 		fr->synths.plain[r_1to1][f_16] = synth_1to1_neon;
822 		fr->synths.stereo[r_1to1][f_16] = synth_1to1_stereo_neon;
823 #		endif
824 #		ifndef NO_REAL
825 		fr->synths.plain[r_1to1][f_real] = synth_1to1_real_neon;
826 		fr->synths.stereo[r_1to1][f_real] = synth_1to1_real_stereo_neon;
827 #		endif
828 #		ifndef NO_32BIT
829 		fr->synths.plain[r_1to1][f_32] = synth_1to1_s32_neon;
830 		fr->synths.stereo[r_1to1][f_32] = synth_1to1_s32_stereo_neon;
831 #		endif
832 		done = 1;
833 	}
834 #	endif
835 
836 #	ifdef OPT_ARM
837 	if(!done && (auto_choose || want_dec == arm))
838 	{
839 		chosen = dn_arm;
840 		fr->cpu_opts.type = arm;
841 #		ifndef NO_16BIT
842 		fr->synths.plain[r_1to1][f_16] = synth_1to1_arm;
843 #		endif
844 		done = 1;
845 	}
846 #	endif
847 
848 #	ifdef OPT_NEON64
849 	if(!done && (auto_choose || want_dec == neon64) && cpu_neon(cpu_flags))
850 	{
851 		chosen = dn_neon64;
852 		fr->cpu_opts.type = neon64;
853 #ifdef OPT_MULTI
854 #		ifndef NO_LAYER3
855 		fr->cpu_opts.the_dct36 = dct36_neon64;
856 #		endif
857 #endif
858 #		ifndef NO_16BIT
859 		fr->synths.plain[r_1to1][f_16] = synth_1to1_neon64;
860 		fr->synths.stereo[r_1to1][f_16] = synth_1to1_stereo_neon64;
861 #		endif
862 #		ifndef NO_REAL
863 		fr->synths.plain[r_1to1][f_real] = synth_1to1_real_neon64;
864 		fr->synths.stereo[r_1to1][f_real] = synth_1to1_fltst_neon64;
865 #		endif
866 #		ifndef NO_32BIT
867 		fr->synths.plain[r_1to1][f_32] = synth_1to1_s32_neon64;
868 		fr->synths.stereo[r_1to1][f_32] = synth_1to1_s32st_neon64;
869 #		endif
870 		done = 1;
871 	}
872 #	endif
873 
874 #	ifdef OPT_GENERIC
875 	if(!done && (auto_choose || want_dec == generic))
876 	{
877 		chosen = dn_generic;
878 		fr->cpu_opts.type = generic;
879 		done = 1;
880 	}
881 #	endif
882 
883 #ifdef OPT_GENERIC_DITHER
884 	if(!done && (auto_choose || want_dec == generic_dither))
885 	{
886 		chosen = "dithered generic";
887 		fr->cpu_opts.type = generic_dither;
888 		dithered = TRUE;
889 #		ifndef NO_16BIT
890 		fr->synths.plain[r_1to1][f_16] = synth_1to1_dither;
891 #		ifndef NO_DOWNSAMPLE
892 		fr->synths.plain[r_2to1][f_16] = synth_2to1_dither;
893 		fr->synths.plain[r_4to1][f_16] = synth_4to1_dither;
894 #		endif
895 #		endif
896 		done = 1;
897 	}
898 #endif
899 
900 	fr->cpu_opts.class = decclass(fr->cpu_opts.type);
901 
902 #	ifndef NO_8BIT
903 #	ifndef NO_16BIT /* possibility to use a 16->8 wrapper... */
904 	/* Last chance to use some optimized routine via generic wrappers (for 8bit). */
905 	if(     fr->cpu_opts.type != ifuenf_dither
906 	     && fr->cpu_opts.type != generic_dither
907 	     && fr->synths.plain[r_1to1][f_16] != synth_base.plain[r_1to1][f_16] )
908 	{
909 		fr->synths.plain[r_1to1][f_8] = synth_1to1_8bit_wrap;
910 		fr->synths.mono[r_1to1][f_8] = synth_1to1_8bit_wrap_mono;
911 		fr->synths.mono2stereo[r_1to1][f_8] = synth_1to1_8bit_wrap_m2s;
912 	}
913 #	endif
914 #	endif
915 
916 #ifdef OPT_DITHER
917 	if(done && dithered)
918 	{
919 		/* run-time dither noise table generation */
920 		if(!frame_dither_init(fr))
921 		{
922 			if(NOQUIET) error("Dither noise setup failed!");
923 			return 0;
924 		}
925 	}
926 #endif
927 
928 	if(done)
929 	{
930 		if(VERBOSE) fprintf(stderr, "Decoder: %s\n", chosen);
931 		return 1;
932 	}
933 	else
934 	{
935 		if(NOQUIET) error("Could not set optimization!");
936 		return 0;
937 	}
938 }
939 
940 enum optdec dectype(const char* decoder)
941 {
942 	enum optdec dt;
943 	if(   (decoder == NULL)
944 	   || (decoder[0] == 0) )
945 	return autodec;
946 
947 	for(dt=autodec; dt<nodec; ++dt)
948 	if(!strcasecmp(decoder, decname[dt])) return dt;
949 
950 	return nodec; /* If we found nothing... */
951 }
952 
953 #ifdef OPT_MULTI
954 
955 /* same number of entries as full list, but empty at beginning */
956 static const char *mpg123_supported_decoder_list[] =
957 {
958 	#ifdef OPT_SSE
959 	NULL,
960 	#endif
961 	#ifdef OPT_SSE_VINTAGE
962 	NULL,
963 	#endif
964 	#ifdef OPT_3DNOWEXT
965 	NULL,
966 	#endif
967 	#ifdef OPT_3DNOWEXT_VINTAGE
968 	NULL,
969 	#endif
970 	#ifdef OPT_3DNOW
971 	NULL,
972 	#endif
973 	#ifdef OPT_3DNOW_VINTAGE
974 	NULL,
975 	#endif
976 	#ifdef OPT_MMX
977 	NULL,
978 	#endif
979 	#ifdef OPT_I586
980 	NULL,
981 	#endif
982 	#ifdef OPT_I586_DITHER
983 	NULL,
984 	#endif
985 	#ifdef OPT_I486
986 	NULL,
987 	#endif
988 	#ifdef OPT_I386
989 	NULL,
990 	#endif
991 	#ifdef OPT_ALTIVEC
992 	NULL,
993 	#endif
994 	#ifdef OPT_AVX
995 	NULL,
996 	#endif
997 	#ifdef OPT_X86_64
998 	NULL,
999 	#endif
1000 	#ifdef OPT_ARM
1001 	NULL,
1002 	#endif
1003 	#ifdef OPT_NEON
1004 	NULL,
1005 	#endif
1006 	#ifdef OPT_NEON64
1007 	NULL,
1008 	#endif
1009 	#ifdef OPT_GENERIC_FLOAT
1010 	NULL,
1011 	#endif
1012 #	ifdef OPT_GENERIC
1013 	NULL,
1014 #	endif
1015 #	ifdef OPT_GENERIC_DITHER
1016 	NULL,
1017 #	endif
1018 	NULL
1019 };
1020 #endif
1021 
1022 static const char *mpg123_decoder_list[] =
1023 {
1024 	#ifdef OPT_SSE
1025 	dn_sse,
1026 	#endif
1027 	#ifdef OPT_SSE_VINTAGE
1028 	dn_sse_vintage,
1029 	#endif
1030 	#ifdef OPT_3DNOWEXT
1031 	dn_dreidnowext,
1032 	#endif
1033 	#ifdef OPT_3DNOWEXT_VINTAGE
1034 	dn_dreidnowext_vintage,
1035 	#endif
1036 	#ifdef OPT_3DNOW
1037 	dn_dreidnow,
1038 	#endif
1039 	#ifdef OPT_3DNOW_VINTAGE
1040 	dn_dreidnow_vintage,
1041 	#endif
1042 	#ifdef OPT_MMX
1043 	dn_mmx,
1044 	#endif
1045 	#ifdef OPT_I586
1046 	dn_ifuenf,
1047 	#endif
1048 	#ifdef OPT_I586_DITHER
1049 	dn_ifuenf_dither,
1050 	#endif
1051 	#ifdef OPT_I486
1052 	dn_ivier,
1053 	#endif
1054 	#ifdef OPT_I386
1055 	dn_idrei,
1056 	#endif
1057 	#ifdef OPT_ALTIVEC
1058 	dn_altivec,
1059 	#endif
1060 	#ifdef OPT_AVX
1061 	dn_avx,
1062 	#endif
1063 	#ifdef OPT_X86_64
1064 	dn_x86_64,
1065 	#endif
1066 	#ifdef OPT_ARM
1067 	dn_arm,
1068 	#endif
1069 	#ifdef OPT_NEON
1070 	dn_neon,
1071 	#endif
1072 	#ifdef OPT_NEON64
1073 	dn_neon64,
1074 	#endif
1075 	#ifdef OPT_GENERIC
1076 	dn_generic,
1077 	#endif
1078 	#ifdef OPT_GENERIC_DITHER
1079 	dn_generic_dither,
1080 	#endif
1081 	NULL
1082 };
1083 
1084 void check_decoders(void )
1085 {
1086 #ifndef OPT_MULTI
1087 	/* In non-multi mode, only the full list (one entry) is used. */
1088 	return;
1089 #else
1090 	const char **d = mpg123_supported_decoder_list;
1091 #if (defined OPT_X86) || (defined OPT_X86_64) || (defined OPT_NEON) || (defined OPT_NEON64)
1092 	getcpuflags(&cpu_flags);
1093 #endif
1094 #ifdef OPT_X86
1095 	if(cpu_i586(cpu_flags))
1096 	{
1097 		/* not yet: if(cpu_sse2(cpu_flags)) printf(" SSE2");
1098 		if(cpu_sse3(cpu_flags)) printf(" SSE3"); */
1099 #ifdef OPT_SSE
1100 		if(cpu_sse(cpu_flags)) *(d++) = dn_sse;
1101 #endif
1102 #ifdef OPT_SSE_VINTAGE
1103 		if(cpu_sse(cpu_flags)) *(d++) = dn_sse_vintage;
1104 #endif
1105 #ifdef OPT_3DNOWEXT
1106 		if(cpu_3dnowext(cpu_flags)) *(d++) = dn_dreidnowext;
1107 #endif
1108 #ifdef OPT_3DNOWEXT_VINTAGE
1109 		if(cpu_3dnowext(cpu_flags)) *(d++) = dn_dreidnowext_vintage;
1110 #endif
1111 #ifdef OPT_3DNOW
1112 		if(cpu_3dnow(cpu_flags)) *(d++) = dn_dreidnow;
1113 #endif
1114 #ifdef OPT_3DNOW_VINTAGE
1115 		if(cpu_3dnow(cpu_flags)) *(d++) = dn_dreidnow_vintage;
1116 #endif
1117 #ifdef OPT_MMX
1118 		if(cpu_mmx(cpu_flags)) *(d++) = dn_mmx;
1119 #endif
1120 #ifdef OPT_I586
1121 		*(d++) = dn_ifuenf;
1122 #endif
1123 #ifdef OPT_I586_DITHER
1124 		*(d++) = dn_ifuenf_dither;
1125 #endif
1126 	}
1127 #endif
1128 /* just assume that the i486 built is run on a i486 cpu... */
1129 #ifdef OPT_I486
1130 	*(d++) = dn_ivier;
1131 #endif
1132 #ifdef OPT_ALTIVEC
1133 	*(d++) = dn_altivec;
1134 #endif
1135 /* every supported x86 can do i386, any cpu can do generic */
1136 #ifdef OPT_I386
1137 	*(d++) = dn_idrei;
1138 #endif
1139 #ifdef OPT_AVX
1140 	if(cpu_avx(cpu_flags)) *(d++) = dn_avx;
1141 #endif
1142 #ifdef OPT_X86_64
1143 	*(d++) = dn_x86_64;
1144 #endif
1145 #ifdef OPT_ARM
1146 	*(d++) = dn_arm;
1147 #endif
1148 #ifdef OPT_NEON
1149 	if(cpu_neon(cpu_flags)) *(d++) = dn_neon;
1150 #endif
1151 #ifdef OPT_NEON64
1152 	if(cpu_neon(cpu_flags)) *(d++) = dn_neon64;
1153 #endif
1154 #ifdef OPT_GENERIC
1155 	*(d++) = dn_generic;
1156 #endif
1157 #ifdef OPT_GENERIC_DITHER
1158 	*(d++) = dn_generic_dither;
1159 #endif
1160 #endif /* ndef OPT_MULTI */
1161 }
1162 
1163 const char* attribute_align_arg mpg123_current_decoder(mpg123_handle *mh)
1164 {
1165 	if(mh == NULL) return NULL;
1166 
1167 	return decname[mh->cpu_opts.type];
1168 }
1169 
1170 const char attribute_align_arg **mpg123_decoders(void){ return mpg123_decoder_list; }
1171 const char attribute_align_arg **mpg123_supported_decoders(void)
1172 {
1173 #ifdef OPT_MULTI
1174 	return mpg123_supported_decoder_list;
1175 #else
1176 	return mpg123_decoder_list;
1177 #endif
1178 }
1179