1 /*
2 optimize: get a grip on the different optimizations
3
4 copyright 2006-9 by the mpg123 project - free software under the terms of the LGPL 2.1
5 see COPYING and AUTHORS files in distribution or http://mpg123.org
6 initially written by Thomas Orgis, inspired by 3DNow stuff in mpg123.[hc]
7
8 Currently, this file contains the struct and function to choose an optimization variant and works only when OPT_MULTI is in effect.
9 */
10
11 #define I_AM_OPTIMIZE
12 #include "mpg123lib_intern.h" /* includes optimize.h */
13 #include "debug.h"
14
15 #if ((defined OPT_X86) || (defined OPT_X86_64) || (defined OPT_NEON) || (defined OPT_NEON64)) && (defined OPT_MULTI)
16 #include "getcpuflags.h"
17 static struct cpuflags cpu_flags;
18 #else
19 /* Faking stuff for non-multi builds. The same code for synth function choice is used.
20 Just no runtime dependency of result... */
21 #define cpu_flags nothing
22 #define cpu_i586(s) 1
23 #define cpu_fpu(s) 1
24 #define cpu_mmx(s) 1
25 #define cpu_3dnow(s) 1
26 #define cpu_3dnowext(s) 1
27 #define cpu_sse(s) 1
28 #define cpu_sse2(s) 1
29 #define cpu_sse3(s) 1
30 #define cpu_avx(s) 1
31 #define cpu_neon(s) 1
32 #endif
33
34 /* Ugly macros to build conditional synth function array values. */
35
36 #ifndef NO_8BIT
37 #define IF8(synth) synth,
38 #else
39 #define IF8(synth)
40 #endif
41
42 #ifndef NO_SYNTH32
43
44 #ifndef NO_REAL
45 #define IFREAL(synth) synth,
46 #else
47 #define IFREAL(synth)
48 #endif
49
50 #ifndef NO_32BIT
51 #define IF32(synth) synth
52 #else
53 #define IF32(synth)
54 #endif
55
56 #else
57
58 #define IFREAL(synth)
59 #define IF32(synth)
60
61 #endif
62
63 #ifndef NO_16BIT
64 # define OUT_SYNTHS(synth_16, synth_8, synth_real, synth_32) { synth_16, IF8(synth_8) IFREAL(synth_real) IF32(synth_32) }
65 #else
66 # define OUT_SYNTHS(synth_16, synth_8, synth_real, synth_32) { IF8(synth_8) IFREAL(synth_real) IF32(synth_32) }
67 #endif
68
69 /* The call of left and right plain synth, wrapped.
70 This may be replaced by a direct stereo optimized synth. */
synth_stereo_wrap(real * bandPtr_l,real * bandPtr_r,mpg123_handle * fr)71 static int synth_stereo_wrap(real *bandPtr_l, real *bandPtr_r, mpg123_handle *fr)
72 {
73 int clip;
74 clip = (fr->synth)(bandPtr_l, 0, fr, 0);
75 clip += (fr->synth)(bandPtr_r, 1, fr, 1);
76 return clip;
77 }
78
79 static const struct synth_s synth_base =
80 {
81 { /* plain */
82 OUT_SYNTHS(synth_1to1, synth_1to1_8bit, synth_1to1_real, synth_1to1_s32)
83 # ifndef NO_DOWNSAMPLE
84 ,OUT_SYNTHS(synth_2to1, synth_2to1_8bit, synth_2to1_real, synth_2to1_s32)
85 ,OUT_SYNTHS(synth_4to1, synth_4to1_8bit, synth_4to1_real, synth_4to1_s32)
86 # endif
87 # ifndef NO_NTOM
88 ,OUT_SYNTHS(synth_ntom, synth_ntom_8bit, synth_ntom_real, synth_ntom_s32)
89 # endif
90 },
91 { /* stereo, by default only wrappers over plain synth */
92 OUT_SYNTHS(synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap)
93 # ifndef NO_DOWNSAMPLE
94 ,OUT_SYNTHS(synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap)
95 ,OUT_SYNTHS(synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap)
96 # endif
97 # ifndef NO_NTOM
98 ,OUT_SYNTHS(synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap)
99 # endif
100 },
101 { /* mono2stereo */
102 OUT_SYNTHS(synth_1to1_m2s, synth_1to1_8bit_m2s, synth_1to1_real_m2s, synth_1to1_s32_m2s)
103 # ifndef NO_DOWNSAMPLE
104 ,OUT_SYNTHS(synth_2to1_m2s, synth_2to1_8bit_m2s, synth_2to1_real_m2s, synth_2to1_s32_m2s)
105 ,OUT_SYNTHS(synth_4to1_m2s, synth_4to1_8bit_m2s, synth_4to1_real_m2s, synth_4to1_s32_m2s)
106 # endif
107 # ifndef NO_NTOM
108 ,OUT_SYNTHS(synth_ntom_m2s, synth_ntom_8bit_m2s, synth_ntom_real_m2s, synth_ntom_s32_m2s)
109 # endif
110 },
111 { /* mono*/
112 OUT_SYNTHS(synth_1to1_mono, synth_1to1_8bit_mono, synth_1to1_real_mono, synth_1to1_s32_mono)
113 # ifndef NO_DOWNSAMPLE
114 ,OUT_SYNTHS(synth_2to1_mono, synth_2to1_8bit_mono, synth_2to1_real_mono, synth_2to1_s32_mono)
115 ,OUT_SYNTHS(synth_4to1_mono, synth_4to1_8bit_mono, synth_4to1_real_mono, synth_4to1_s32_mono)
116 # endif
117 # ifndef NO_NTOM
118 ,OUT_SYNTHS(synth_ntom_mono, synth_ntom_8bit_mono, synth_ntom_real_mono, synth_ntom_s32_mono)
119 #endif
120 }
121 };
122
123 #ifdef OPT_X86
124 /* More plain synths for i386 */
125 const func_synth plain_i386[r_limit][f_limit] =
126 { /* plain */
127 OUT_SYNTHS(synth_1to1_i386, synth_1to1_8bit_i386, synth_1to1_real_i386, synth_1to1_s32_i386)
128 # ifndef NO_DOWNSAMPLE
129 ,OUT_SYNTHS(synth_2to1_i386, synth_2to1_8bit_i386, synth_2to1_real_i386, synth_2to1_s32_i386)
130 ,OUT_SYNTHS(synth_4to1_i386, synth_4to1_8bit_i386, synth_4to1_real_i386, synth_4to1_s32_i386)
131 # endif
132 # ifndef NO_NTOM
133 ,OUT_SYNTHS(synth_ntom, synth_ntom_8bit, synth_ntom_real, synth_ntom_s32)
134 # endif
135 };
136 #endif
137
138
defdec(void)139 enum optdec defdec(void){ return defopt; }
140
decclass(const enum optdec type)141 enum optcla decclass(const enum optdec type)
142 {
143 return
144 (
145 type == mmx
146 || type == sse
147 || type == sse_vintage
148 || type == dreidnowext
149 || type == dreidnowext_vintage
150 || type == x86_64
151 || type == neon
152 || type == neon64
153 || type == avx
154 ) ? mmxsse : normal;
155 }
156
find_synth(func_synth synth,const func_synth synths[r_limit][f_limit])157 static int find_synth(func_synth synth, const func_synth synths[r_limit][f_limit])
158 {
159 enum synth_resample ri;
160 enum synth_format fi;
161 for(ri=0; ri<r_limit; ++ri)
162 for(fi=0; fi<f_limit; ++fi)
163 if(synth == synths[ri][fi])
164 return TRUE;
165
166 return FALSE;
167 }
168
169
170 #if defined(OPT_SSE) || defined(OPT_SSE_VINTAGE)
171 /* After knowing that it is either vintage or current SSE,
172 this separates the two. In case of non-OPT_MULTI, only one
173 of OPT_SSE and OPT_SSE_VINTAGE is active. */
sse_or_vintage(mpg123_handle * fr)174 static enum optdec sse_or_vintage(mpg123_handle *fr)
175 {
176 enum optdec type;
177 type = sse_vintage;
178 # ifdef OPT_SSE
179 # ifdef OPT_MULTI
180 if(fr->cpu_opts.the_dct36 == dct36_sse)
181 # endif
182 type = sse;
183 # endif
184 return type;
185 }
186 #endif
187
188 /* Determine what kind of decoder is actually active
189 This depends on runtime choices which may cause fallback to i386 or generic code. */
find_dectype(mpg123_handle * fr)190 static int find_dectype(mpg123_handle *fr)
191 {
192 enum optdec type = nodec;
193 /* Direct and indirect usage, 1to1 stereo decoding.
194 Concentrating on the plain stereo synth should be fine, mono stuff is derived. */
195 func_synth basic_synth = fr->synth;
196 #ifndef NO_8BIT
197 #ifndef NO_16BIT
198 if(basic_synth == synth_1to1_8bit_wrap)
199 basic_synth = fr->synths.plain[r_1to1][f_16]; /* That is what's really below the surface. */
200 #endif
201 #endif
202
203 if(FALSE) ; /* Just to initialize the else if ladder. */
204 #ifndef NO_16BIT
205 #if defined(OPT_3DNOWEXT) || defined(OPT_3DNOWEXT_VINTAGE)
206 else if(basic_synth == synth_1to1_3dnowext)
207 {
208 type = dreidnowext;
209 # ifdef OPT_3DNOWEXT_VINTAGE
210 # ifdef OPT_MULTI
211 if(fr->cpu_opts.the_dct36 == dct36_3dnowext)
212 # endif
213 type = dreidnowext_vintage;
214 # endif
215 }
216 #endif
217 #if defined(OPT_SSE) || defined(OPT_SSE_VINTAGE)
218 else if(basic_synth == synth_1to1_sse)
219 {
220 type = sse_or_vintage(fr);
221 }
222 #endif
223 #if defined(OPT_3DNOW) || defined(OPT_3DNOW_VINTAGE)
224 else if(basic_synth == synth_1to1_3dnow)
225 {
226 type = dreidnow;
227 # ifdef OPT_3DNOW_VINTAGE
228 # ifdef OPT_MULTI
229 if(fr->cpu_opts.the_dct36 == dct36_3dnow)
230 # endif
231 type = dreidnow_vintage;
232 # endif
233 }
234 #endif
235 #ifdef OPT_MMX
236 else if(basic_synth == synth_1to1_mmx) type = mmx;
237 #endif
238 #ifdef OPT_I586_DITHER
239 else if(basic_synth == synth_1to1_i586_dither) type = ifuenf_dither;
240 #endif
241 #ifdef OPT_I586
242 else if(basic_synth == synth_1to1_i586) type = ifuenf;
243 #endif
244 #ifdef OPT_ALTIVEC
245 else if(basic_synth == synth_1to1_altivec) type = altivec;
246 #endif
247 #ifdef OPT_X86_64
248 else if(basic_synth == synth_1to1_x86_64) type = x86_64;
249 #endif
250 #ifdef OPT_AVX
251 else if(basic_synth == synth_1to1_avx) type = avx;
252 #endif
253 #ifdef OPT_ARM
254 else if(basic_synth == synth_1to1_arm) type = arm;
255 #endif
256 #ifdef OPT_NEON
257 else if(basic_synth == synth_1to1_neon) type = neon;
258 #endif
259 #ifdef OPT_NEON64
260 else if(basic_synth == synth_1to1_neon64) type = neon64;
261 #endif
262 #ifdef OPT_GENERIC_DITHER
263 else if(basic_synth == synth_1to1_dither) type = generic_dither;
264 #endif
265 #ifdef OPT_DITHER /* either i586 or generic! */
266 #ifndef NO_DOWNSAMPLE
267 else if
268 (
269 basic_synth == synth_2to1_dither
270 || basic_synth == synth_4to1_dither
271 ) type = generic_dither;
272 #endif
273 #endif
274 #endif /* 16bit */
275
276 #ifndef NO_SYNTH32
277
278 #ifndef NO_REAL
279 #if defined(OPT_SSE) || defined(OPT_SSE_VINTAGE)
280 else if(basic_synth == synth_1to1_real_sse)
281 {
282 type = sse_or_vintage(fr);
283 }
284 #endif
285 #ifdef OPT_X86_64
286 else if(basic_synth == synth_1to1_real_x86_64) type = x86_64;
287 #endif
288 #ifdef OPT_AVX
289 else if(basic_synth == synth_1to1_real_avx) type = avx;
290 #endif
291 #ifdef OPT_ALTIVEC
292 else if(basic_synth == synth_1to1_real_altivec) type = altivec;
293 #endif
294 #ifdef OPT_NEON
295 else if(basic_synth == synth_1to1_real_neon) type = neon;
296 #endif
297 #ifdef OPT_NEON64
298 else if(basic_synth == synth_1to1_real_neon64) type = neon64;
299 #endif
300
301 #endif /* real */
302
303 #ifndef NO_32BIT
304 #if defined(OPT_SSE) || defined(OPT_SSE_VINTAGE)
305 else if(basic_synth == synth_1to1_s32_sse)
306 {
307 type = sse_or_vintage(fr);
308 }
309 #endif
310 #ifdef OPT_X86_64
311 else if(basic_synth == synth_1to1_s32_x86_64) type = x86_64;
312 #endif
313 #ifdef OPT_AVX
314 else if(basic_synth == synth_1to1_s32_avx) type = avx;
315 #endif
316 #ifdef OPT_ALTIVEC
317 else if(basic_synth == synth_1to1_s32_altivec) type = altivec;
318 #endif
319 #ifdef OPT_NEON
320 else if(basic_synth == synth_1to1_s32_neon) type = neon;
321 #endif
322 #ifdef OPT_NEON64
323 else if(basic_synth == synth_1to1_s32_neon64) type = neon64;
324 #endif
325 #endif /* 32bit */
326
327 #endif /* any 32 bit synth */
328
329 #ifdef OPT_X86
330 else if(find_synth(basic_synth, plain_i386))
331 type = idrei;
332 #endif
333
334 else if(find_synth(basic_synth, synth_base.plain))
335 type = generic;
336
337
338
339 #ifdef OPT_I486
340 /* i486 is special ... the specific code is in use for 16bit 1to1 stereo
341 otherwise we have i386 active... but still, the distinction doesn't matter*/
342 type = ivier;
343 #endif
344
345 if(type != nodec)
346 {
347 fr->cpu_opts.type = type;
348 fr->cpu_opts.class = decclass(type);
349
350 debug3("determined active decoder type %i (%s) of class %i", type, decname[type], fr->cpu_opts.class);
351 return MPG123_OK;
352 }
353 else
354 {
355 if(NOQUIET) error("Unable to determine active decoder type -- this is SERIOUS b0rkage!");
356
357 fr->err = MPG123_BAD_DECODER_SETUP;
358 return MPG123_ERR;
359 }
360 }
361
362 /* set synth functions for current frame, optimizations handled by opt_* macros */
set_synth_functions(mpg123_handle * fr)363 int set_synth_functions(mpg123_handle *fr)
364 {
365 enum synth_resample resample = r_none;
366 enum synth_format basic_format = f_none; /* Default is always 16bit, or whatever. */
367
368 /* Select the basic output format, different from 16bit: 8bit, real. */
369 if(FALSE){}
370 #ifndef NO_16BIT
371 else if(fr->af.dec_enc & MPG123_ENC_16)
372 basic_format = f_16;
373 #endif
374 #ifndef NO_8BIT
375 else if(fr->af.dec_enc & MPG123_ENC_8)
376 basic_format = f_8;
377 #endif
378 #ifndef NO_REAL
379 else if(fr->af.dec_enc & MPG123_ENC_FLOAT)
380 basic_format = f_real;
381 #endif
382 #ifndef NO_32BIT
383 /* 24 bit integer means decoding to 32 bit first. */
384 else if(fr->af.dec_enc & MPG123_ENC_32 || fr->af.dec_enc & MPG123_ENC_24)
385 basic_format = f_32;
386 #endif
387
388 /* Make sure the chosen format is compiled into this lib. */
389 if(basic_format == f_none)
390 {
391 if(NOQUIET) error("set_synth_functions: This output format is disabled in this build!");
392
393 return -1;
394 }
395
396 /* Be explicit about downsampling variant. */
397 switch(fr->down_sample)
398 {
399 case 0: resample = r_1to1; break;
400 #ifndef NO_DOWNSAMPLE
401 case 1: resample = r_2to1; break;
402 case 2: resample = r_4to1; break;
403 #endif
404 #ifndef NO_NTOM
405 case 3: resample = r_ntom; break;
406 #endif
407 }
408
409 if(resample == r_none)
410 {
411 if(NOQUIET) error("set_synth_functions: This resampling mode is not supported in this build!");
412
413 return -1;
414 }
415
416 debug2("selecting synth: resample=%i format=%i", resample, basic_format);
417 /* Finally selecting the synth functions for stereo / mono. */
418 fr->synth = fr->synths.plain[resample][basic_format];
419 fr->synth_stereo = fr->synths.stereo[resample][basic_format];
420 fr->synth_mono = fr->af.channels==2
421 ? fr->synths.mono2stereo[resample][basic_format] /* Mono MPEG file decoded to stereo. */
422 : fr->synths.mono[resample][basic_format]; /* Mono MPEG file decoded to mono. */
423
424 if(find_dectype(fr) != MPG123_OK) /* Actually determine the currently active decoder breed. */
425 {
426 fr->err = MPG123_BAD_DECODER_SETUP;
427 return MPG123_ERR;
428 }
429
430 if(frame_buffers(fr) != 0)
431 {
432 fr->err = MPG123_NO_BUFFERS;
433 if(NOQUIET) error("Failed to set up decoder buffers!");
434
435 return MPG123_ERR;
436 }
437
438 #ifndef NO_8BIT
439 if(basic_format == f_8)
440 {
441 if(make_conv16to8_table(fr) != 0)
442 {
443 if(NOQUIET) error("Failed to set up conv16to8 table!");
444 /* it's a bit more work to get proper error propagation up */
445 return -1;
446 }
447 }
448 #endif
449
450 #ifdef OPT_MMXORSSE
451 /* Special treatment for MMX, SSE and 3DNowExt stuff.
452 The real-decoding SSE for x86-64 uses normal tables! */
453 if(fr->cpu_opts.class == mmxsse
454 # ifndef NO_REAL
455 && basic_format != f_real
456 # endif
457 # ifndef NO_32BIT
458 && basic_format != f_32
459 # endif
460 # ifdef ACCURATE_ROUNDING
461 && fr->cpu_opts.type != sse
462 && fr->cpu_opts.type != sse_vintage
463 && fr->cpu_opts.type != x86_64
464 && fr->cpu_opts.type != neon
465 && fr->cpu_opts.type != neon64
466 && fr->cpu_opts.type != avx
467 # endif
468 )
469 {
470 #ifndef NO_LAYER3
471 init_layer3_stuff(fr, init_layer3_gainpow2_mmx);
472 #endif
473 #ifndef NO_LAYER12
474 init_layer12_stuff(fr, init_layer12_table_mmx);
475 #endif
476 fr->make_decode_tables = make_decode_tables_mmx;
477 }
478 else
479 #endif
480 {
481 #ifndef NO_LAYER3
482 init_layer3_stuff(fr, init_layer3_gainpow2);
483 #endif
484 #ifndef NO_LAYER12
485 init_layer12_stuff(fr, init_layer12_table);
486 #endif
487 fr->make_decode_tables = make_decode_tables;
488 }
489
490 /* We allocated the table buffers just now, so (re)create the tables. */
491 fr->make_decode_tables(fr);
492
493 return 0;
494 }
495
frame_cpu_opt(mpg123_handle * fr,const char * cpu)496 int frame_cpu_opt(mpg123_handle *fr, const char* cpu)
497 {
498 const char* chosen = ""; /* the chosen decoder opt as string */
499 enum optdec want_dec = nodec;
500 int done = 0;
501 int auto_choose = 0;
502 #ifdef OPT_DITHER
503 int dithered = FALSE; /* If some dithered decoder is chosen. */
504 #endif
505
506 want_dec = dectype(cpu);
507 auto_choose = want_dec == autodec;
508 /* Fill whole array of synth functions with generic code first. */
509 fr->synths = synth_base;
510
511 #ifndef OPT_MULTI
512 {
513 if(!auto_choose && want_dec != defopt)
514 {
515 if(NOQUIET) error2("you wanted decoder type %i, I only have %i", want_dec, defopt);
516 }
517 auto_choose = TRUE; /* There will be only one choice anyway. */
518 }
519 #endif
520
521 fr->cpu_opts.type = nodec;
522 #ifdef OPT_MULTI
523 #ifndef NO_LAYER3
524 #if (defined OPT_3DNOW_VINTAGE || defined OPT_3DNOWEXT_VINTAGE || defined OPT_SSE || defined OPT_X86_64 || defined OPT_AVX || defined OPT_NEON || defined OPT_NEON64)
525 fr->cpu_opts.the_dct36 = dct36;
526 #endif
527 #endif
528 #endif
529 /* covers any i386+ cpu; they actually differ only in the synth_1to1 function, mostly... */
530 #ifdef OPT_X86
531 if(cpu_i586(cpu_flags))
532 {
533 # ifdef OPT_MULTI
534 debug2("standard flags: 0x%08x\textended flags: 0x%08x", cpu_flags.std, cpu_flags.ext);
535 # endif
536 # ifdef OPT_SSE
537 if( !done && (auto_choose || want_dec == sse)
538 && cpu_sse(cpu_flags) && cpu_mmx(cpu_flags) )
539 {
540 chosen = dn_sse;
541 fr->cpu_opts.type = sse;
542 #ifdef OPT_MULTI
543 # ifndef NO_LAYER3
544 /* if(cpu_fast_sse(cpu_flags)) */ fr->cpu_opts.the_dct36 = dct36_sse;
545 # endif
546 #endif
547 # ifndef NO_16BIT
548 fr->synths.plain[r_1to1][f_16] = synth_1to1_sse;
549 # ifdef ACCURATE_ROUNDING
550 fr->synths.stereo[r_1to1][f_16] = synth_1to1_stereo_sse;
551 # endif
552 # endif
553 # ifndef NO_REAL
554 fr->synths.plain[r_1to1][f_real] = synth_1to1_real_sse;
555 fr->synths.stereo[r_1to1][f_real] = synth_1to1_real_stereo_sse;
556 # endif
557 # ifndef NO_32BIT
558 fr->synths.plain[r_1to1][f_32] = synth_1to1_s32_sse;
559 fr->synths.stereo[r_1to1][f_32] = synth_1to1_s32_stereo_sse;
560 # endif
561 done = 1;
562 }
563 # endif
564 # ifdef OPT_SSE_VINTAGE
565 if( !done && (auto_choose || want_dec == sse_vintage)
566 && cpu_sse(cpu_flags) && cpu_mmx(cpu_flags) )
567 {
568 chosen = dn_sse_vintage;
569 fr->cpu_opts.type = sse_vintage;
570 # ifndef NO_16BIT
571 fr->synths.plain[r_1to1][f_16] = synth_1to1_sse;
572 # ifdef ACCURATE_ROUNDING
573 fr->synths.stereo[r_1to1][f_16] = synth_1to1_stereo_sse;
574 # endif
575 # endif
576 # ifndef NO_REAL
577 fr->synths.plain[r_1to1][f_real] = synth_1to1_real_sse;
578 fr->synths.stereo[r_1to1][f_real] = synth_1to1_real_stereo_sse;
579 # endif
580 # ifndef NO_32BIT
581 fr->synths.plain[r_1to1][f_32] = synth_1to1_s32_sse;
582 fr->synths.stereo[r_1to1][f_32] = synth_1to1_s32_stereo_sse;
583 # endif
584 done = 1;
585 }
586 # endif
587 # ifdef OPT_3DNOWEXT
588 if( !done && (auto_choose || want_dec == dreidnowext)
589 && cpu_3dnow(cpu_flags)
590 && cpu_3dnowext(cpu_flags)
591 && cpu_mmx(cpu_flags) )
592 {
593 chosen = dn_dreidnowext;
594 fr->cpu_opts.type = dreidnowext;
595 # ifndef NO_16BIT
596 fr->synths.plain[r_1to1][f_16] = synth_1to1_3dnowext;
597 # endif
598 done = 1;
599 }
600 # endif
601 # ifdef OPT_3DNOWEXT_VINTAGE
602 if( !done && (auto_choose || want_dec == dreidnowext_vintage)
603 && cpu_3dnow(cpu_flags)
604 && cpu_3dnowext(cpu_flags)
605 && cpu_mmx(cpu_flags) )
606 {
607 chosen = dn_dreidnowext_vintage;
608 fr->cpu_opts.type = dreidnowext_vintage;
609 #ifdef OPT_MULTI
610 # ifndef NO_LAYER3
611 fr->cpu_opts.the_dct36 = dct36_3dnowext;
612 # endif
613 #endif
614 # ifndef NO_16BIT
615 fr->synths.plain[r_1to1][f_16] = synth_1to1_3dnowext;
616 # endif
617 done = 1;
618 }
619 # endif
620 # ifdef OPT_3DNOW
621 if( !done && (auto_choose || want_dec == dreidnow)
622 && cpu_3dnow(cpu_flags) && cpu_mmx(cpu_flags) )
623 {
624 chosen = dn_dreidnow;
625 fr->cpu_opts.type = dreidnow;
626 # ifndef NO_16BIT
627 fr->synths.plain[r_1to1][f_16] = synth_1to1_3dnow;
628 # endif
629 done = 1;
630 }
631 # endif
632 # ifdef OPT_3DNOW_VINTAGE
633 if( !done && (auto_choose || want_dec == dreidnow_vintage)
634 && cpu_3dnow(cpu_flags) && cpu_mmx(cpu_flags) )
635 {
636 chosen = dn_dreidnow_vintage;
637 fr->cpu_opts.type = dreidnow_vintage;
638 #ifdef OPT_MULTI
639 # ifndef NO_LAYER3
640 fr->cpu_opts.the_dct36 = dct36_3dnow;
641 # endif
642 #endif
643 # ifndef NO_16BIT
644 fr->synths.plain[r_1to1][f_16] = synth_1to1_3dnow;
645 # endif
646 done = 1;
647 }
648 # endif
649 #ifdef OPT_MMX
650 if( !done && (auto_choose || want_dec == mmx)
651 && cpu_mmx(cpu_flags) )
652 {
653 chosen = dn_mmx;
654 fr->cpu_opts.type = mmx;
655 # ifndef NO_16BIT
656 fr->synths.plain[r_1to1][f_16] = synth_1to1_mmx;
657 # endif
658 done = 1;
659 }
660 #endif
661 #ifdef OPT_I586
662 if(!done && (auto_choose || want_dec == ifuenf))
663 {
664 chosen = "i586/pentium";
665 fr->cpu_opts.type = ifuenf;
666 # ifndef NO_16BIT
667 fr->synths.plain[r_1to1][f_16] = synth_1to1_i586;
668 # endif
669 done = 1;
670 }
671 #endif
672 #ifdef OPT_I586_DITHER
673 if(!done && (auto_choose || want_dec == ifuenf_dither))
674 {
675 chosen = "dithered i586/pentium";
676 fr->cpu_opts.type = ifuenf_dither;
677 dithered = TRUE;
678 # ifndef NO_16BIT
679 fr->synths.plain[r_1to1][f_16] = synth_1to1_i586_dither;
680 # ifndef NO_DOWNSAMPLE
681 fr->synths.plain[r_2to1][f_16] = synth_2to1_dither;
682 fr->synths.plain[r_4to1][f_16] = synth_4to1_dither;
683 # endif
684 # endif
685 done = 1;
686 }
687 #endif
688 }
689 #ifdef OPT_I486
690 /* That won't cooperate in multi opt mode - forcing i486 in layer3.c
691 But still... here it is... maybe for real use in future. */
692 if(!done && (auto_choose || want_dec == ivier))
693 {
694 chosen = dn_ivier;
695 fr->cpu_opts.type = ivier;
696 done = 1;
697 }
698 #endif
699 #ifdef OPT_I386
700 if(!done && (auto_choose || want_dec == idrei))
701 {
702 chosen = dn_idrei;
703 fr->cpu_opts.type = idrei;
704 done = 1;
705 }
706 #endif
707
708 if(done)
709 {
710 /*
711 We have chosen some x86 decoder... fillup some i386 stuff.
712 There is an open question about using dithered synth_1to1 for 8bit wrappers.
713 For quality it won't make sense, but wrapped i586_dither wrapped may still be faster...
714 */
715 enum synth_resample ri;
716 enum synth_format fi;
717 # ifndef NO_8BIT
718 # ifndef NO_16BIT /* possibility to use a 16->8 wrapper... */
719 if(fr->synths.plain[r_1to1][f_16] != synth_base.plain[r_1to1][f_16])
720 {
721 fr->synths.plain[r_1to1][f_8] = synth_1to1_8bit_wrap;
722 fr->synths.mono[r_1to1][f_8] = synth_1to1_8bit_wrap_mono;
723 fr->synths.mono2stereo[r_1to1][f_8] = synth_1to1_8bit_wrap_m2s;
724 }
725 # endif
726 # endif
727 for(ri=0; ri<r_limit; ++ri)
728 for(fi=0; fi<f_limit; ++fi)
729 {
730 if(fr->synths.plain[ri][fi] == synth_base.plain[ri][fi])
731 fr->synths.plain[ri][fi] = plain_i386[ri][fi];
732 }
733 }
734
735 #endif /* OPT_X86 */
736
737 #ifdef OPT_AVX
738 if(!done && (auto_choose || want_dec == avx) && cpu_avx(cpu_flags))
739 {
740 chosen = "x86-64 (AVX)";
741 fr->cpu_opts.type = avx;
742 #ifdef OPT_MULTI
743 # ifndef NO_LAYER3
744 fr->cpu_opts.the_dct36 = dct36_avx;
745 # endif
746 #endif
747 # ifndef NO_16BIT
748 fr->synths.plain[r_1to1][f_16] = synth_1to1_avx;
749 fr->synths.stereo[r_1to1][f_16] = synth_1to1_stereo_avx;
750 # endif
751 # ifndef NO_REAL
752 fr->synths.plain[r_1to1][f_real] = synth_1to1_real_avx;
753 fr->synths.stereo[r_1to1][f_real] = synth_1to1_fltst_avx;
754 # endif
755 # ifndef NO_32BIT
756 fr->synths.plain[r_1to1][f_32] = synth_1to1_s32_avx;
757 fr->synths.stereo[r_1to1][f_32] = synth_1to1_s32_stereo_avx;
758 # endif
759 done = 1;
760 }
761 #endif
762
763 #ifdef OPT_X86_64
764 if(!done && (auto_choose || want_dec == x86_64))
765 {
766 chosen = "x86-64 (SSE)";
767 fr->cpu_opts.type = x86_64;
768 #ifdef OPT_MULTI
769 # ifndef NO_LAYER3
770 fr->cpu_opts.the_dct36 = dct36_x86_64;
771 # endif
772 #endif
773 # ifndef NO_16BIT
774 fr->synths.plain[r_1to1][f_16] = synth_1to1_x86_64;
775 fr->synths.stereo[r_1to1][f_16] = synth_1to1_stereo_x86_64;
776 # endif
777 # ifndef NO_REAL
778 fr->synths.plain[r_1to1][f_real] = synth_1to1_real_x86_64;
779 fr->synths.stereo[r_1to1][f_real] = synth_1to1_real_stereo_x86_64;
780 # endif
781 # ifndef NO_32BIT
782 fr->synths.plain[r_1to1][f_32] = synth_1to1_s32_x86_64;
783 fr->synths.stereo[r_1to1][f_32] = synth_1to1_s32_stereo_x86_64;
784 # endif
785 done = 1;
786 }
787 #endif
788
789 # ifdef OPT_ALTIVEC
790 if(!done && (auto_choose || want_dec == altivec))
791 {
792 chosen = dn_altivec;
793 fr->cpu_opts.type = altivec;
794 # ifndef NO_16BIT
795 fr->synths.plain[r_1to1][f_16] = synth_1to1_altivec;
796 fr->synths.stereo[r_1to1][f_16] = synth_1to1_stereo_altivec;
797 # endif
798 # ifndef NO_REAL
799 fr->synths.plain[r_1to1][f_real] = synth_1to1_real_altivec;
800 fr->synths.stereo[r_1to1][f_real] = synth_1to1_fltst_altivec;
801 # endif
802 # ifndef NO_32BIT
803 fr->synths.plain[r_1to1][f_32] = synth_1to1_s32_altivec;
804 fr->synths.stereo[r_1to1][f_32] = synth_1to1_s32_stereo_altivec;
805 # endif
806 done = 1;
807 }
808 # endif
809
810 # ifdef OPT_NEON
811 if(!done && (auto_choose || want_dec == neon) && cpu_neon(cpu_flags))
812 {
813 chosen = dn_neon;
814 fr->cpu_opts.type = neon;
815 #ifdef OPT_MULTI
816 # ifndef NO_LAYER3
817 fr->cpu_opts.the_dct36 = dct36_neon;
818 # endif
819 #endif
820 # ifndef NO_16BIT
821 fr->synths.plain[r_1to1][f_16] = synth_1to1_neon;
822 fr->synths.stereo[r_1to1][f_16] = synth_1to1_stereo_neon;
823 # endif
824 # ifndef NO_REAL
825 fr->synths.plain[r_1to1][f_real] = synth_1to1_real_neon;
826 fr->synths.stereo[r_1to1][f_real] = synth_1to1_real_stereo_neon;
827 # endif
828 # ifndef NO_32BIT
829 fr->synths.plain[r_1to1][f_32] = synth_1to1_s32_neon;
830 fr->synths.stereo[r_1to1][f_32] = synth_1to1_s32_stereo_neon;
831 # endif
832 done = 1;
833 }
834 # endif
835
836 # ifdef OPT_ARM
837 if(!done && (auto_choose || want_dec == arm))
838 {
839 chosen = dn_arm;
840 fr->cpu_opts.type = arm;
841 # ifndef NO_16BIT
842 fr->synths.plain[r_1to1][f_16] = synth_1to1_arm;
843 # endif
844 done = 1;
845 }
846 # endif
847
848 # ifdef OPT_NEON64
849 if(!done && (auto_choose || want_dec == neon64) && cpu_neon(cpu_flags))
850 {
851 chosen = dn_neon64;
852 fr->cpu_opts.type = neon64;
853 #ifdef OPT_MULTI
854 # ifndef NO_LAYER3
855 fr->cpu_opts.the_dct36 = dct36_neon64;
856 # endif
857 #endif
858 # ifndef NO_16BIT
859 fr->synths.plain[r_1to1][f_16] = synth_1to1_neon64;
860 fr->synths.stereo[r_1to1][f_16] = synth_1to1_stereo_neon64;
861 # endif
862 # ifndef NO_REAL
863 fr->synths.plain[r_1to1][f_real] = synth_1to1_real_neon64;
864 fr->synths.stereo[r_1to1][f_real] = synth_1to1_fltst_neon64;
865 # endif
866 # ifndef NO_32BIT
867 fr->synths.plain[r_1to1][f_32] = synth_1to1_s32_neon64;
868 fr->synths.stereo[r_1to1][f_32] = synth_1to1_s32st_neon64;
869 # endif
870 done = 1;
871 }
872 # endif
873
874 # ifdef OPT_GENERIC
875 if(!done && (auto_choose || want_dec == generic))
876 {
877 chosen = dn_generic;
878 fr->cpu_opts.type = generic;
879 done = 1;
880 }
881 # endif
882
883 #ifdef OPT_GENERIC_DITHER
884 if(!done && (auto_choose || want_dec == generic_dither))
885 {
886 chosen = "dithered generic";
887 fr->cpu_opts.type = generic_dither;
888 dithered = TRUE;
889 # ifndef NO_16BIT
890 fr->synths.plain[r_1to1][f_16] = synth_1to1_dither;
891 # ifndef NO_DOWNSAMPLE
892 fr->synths.plain[r_2to1][f_16] = synth_2to1_dither;
893 fr->synths.plain[r_4to1][f_16] = synth_4to1_dither;
894 # endif
895 # endif
896 done = 1;
897 }
898 #endif
899
900 fr->cpu_opts.class = decclass(fr->cpu_opts.type);
901
902 # ifndef NO_8BIT
903 # ifndef NO_16BIT /* possibility to use a 16->8 wrapper... */
904 /* Last chance to use some optimized routine via generic wrappers (for 8bit). */
905 if( fr->cpu_opts.type != ifuenf_dither
906 && fr->cpu_opts.type != generic_dither
907 && fr->synths.plain[r_1to1][f_16] != synth_base.plain[r_1to1][f_16] )
908 {
909 fr->synths.plain[r_1to1][f_8] = synth_1to1_8bit_wrap;
910 fr->synths.mono[r_1to1][f_8] = synth_1to1_8bit_wrap_mono;
911 fr->synths.mono2stereo[r_1to1][f_8] = synth_1to1_8bit_wrap_m2s;
912 }
913 # endif
914 # endif
915
916 #ifdef OPT_DITHER
917 if(done && dithered)
918 {
919 /* run-time dither noise table generation */
920 if(!frame_dither_init(fr))
921 {
922 if(NOQUIET) error("Dither noise setup failed!");
923 return 0;
924 }
925 }
926 #endif
927
928 if(done)
929 {
930 if(VERBOSE) fprintf(stderr, "Decoder: %s\n", chosen);
931 return 1;
932 }
933 else
934 {
935 if(NOQUIET) error("Could not set optimization!");
936 return 0;
937 }
938 }
939
dectype(const char * decoder)940 enum optdec dectype(const char* decoder)
941 {
942 enum optdec dt;
943 if( (decoder == NULL)
944 || (decoder[0] == 0) )
945 return autodec;
946
947 for(dt=autodec; dt<nodec; ++dt)
948 if(!strcasecmp(decoder, decname[dt])) return dt;
949
950 return nodec; /* If we found nothing... */
951 }
952
953 #ifdef OPT_MULTI
954
955 /* same number of entries as full list, but empty at beginning */
956 static const char *mpg123_supported_decoder_list[] =
957 {
958 #ifdef OPT_SSE
959 NULL,
960 #endif
961 #ifdef OPT_SSE_VINTAGE
962 NULL,
963 #endif
964 #ifdef OPT_3DNOWEXT
965 NULL,
966 #endif
967 #ifdef OPT_3DNOWEXT_VINTAGE
968 NULL,
969 #endif
970 #ifdef OPT_3DNOW
971 NULL,
972 #endif
973 #ifdef OPT_3DNOW_VINTAGE
974 NULL,
975 #endif
976 #ifdef OPT_MMX
977 NULL,
978 #endif
979 #ifdef OPT_I586
980 NULL,
981 #endif
982 #ifdef OPT_I586_DITHER
983 NULL,
984 #endif
985 #ifdef OPT_I486
986 NULL,
987 #endif
988 #ifdef OPT_I386
989 NULL,
990 #endif
991 #ifdef OPT_ALTIVEC
992 NULL,
993 #endif
994 #ifdef OPT_AVX
995 NULL,
996 #endif
997 #ifdef OPT_X86_64
998 NULL,
999 #endif
1000 #ifdef OPT_ARM
1001 NULL,
1002 #endif
1003 #ifdef OPT_NEON
1004 NULL,
1005 #endif
1006 #ifdef OPT_NEON64
1007 NULL,
1008 #endif
1009 #ifdef OPT_GENERIC_FLOAT
1010 NULL,
1011 #endif
1012 # ifdef OPT_GENERIC
1013 NULL,
1014 # endif
1015 # ifdef OPT_GENERIC_DITHER
1016 NULL,
1017 # endif
1018 NULL
1019 };
1020 #endif
1021
1022 static const char *mpg123_decoder_list[] =
1023 {
1024 #ifdef OPT_SSE
1025 dn_sse,
1026 #endif
1027 #ifdef OPT_SSE_VINTAGE
1028 dn_sse_vintage,
1029 #endif
1030 #ifdef OPT_3DNOWEXT
1031 dn_dreidnowext,
1032 #endif
1033 #ifdef OPT_3DNOWEXT_VINTAGE
1034 dn_dreidnowext_vintage,
1035 #endif
1036 #ifdef OPT_3DNOW
1037 dn_dreidnow,
1038 #endif
1039 #ifdef OPT_3DNOW_VINTAGE
1040 dn_dreidnow_vintage,
1041 #endif
1042 #ifdef OPT_MMX
1043 dn_mmx,
1044 #endif
1045 #ifdef OPT_I586
1046 dn_ifuenf,
1047 #endif
1048 #ifdef OPT_I586_DITHER
1049 dn_ifuenf_dither,
1050 #endif
1051 #ifdef OPT_I486
1052 dn_ivier,
1053 #endif
1054 #ifdef OPT_I386
1055 dn_idrei,
1056 #endif
1057 #ifdef OPT_ALTIVEC
1058 dn_altivec,
1059 #endif
1060 #ifdef OPT_AVX
1061 dn_avx,
1062 #endif
1063 #ifdef OPT_X86_64
1064 dn_x86_64,
1065 #endif
1066 #ifdef OPT_ARM
1067 dn_arm,
1068 #endif
1069 #ifdef OPT_NEON
1070 dn_neon,
1071 #endif
1072 #ifdef OPT_NEON64
1073 dn_neon64,
1074 #endif
1075 #ifdef OPT_GENERIC
1076 dn_generic,
1077 #endif
1078 #ifdef OPT_GENERIC_DITHER
1079 dn_generic_dither,
1080 #endif
1081 NULL
1082 };
1083
check_decoders(void)1084 void check_decoders(void )
1085 {
1086 #ifndef OPT_MULTI
1087 /* In non-multi mode, only the full list (one entry) is used. */
1088 return;
1089 #else
1090 const char **d = mpg123_supported_decoder_list;
1091 #if (defined OPT_X86) || (defined OPT_X86_64) || (defined OPT_NEON) || (defined OPT_NEON64)
1092 getcpuflags(&cpu_flags);
1093 #endif
1094 #ifdef OPT_X86
1095 if(cpu_i586(cpu_flags))
1096 {
1097 /* not yet: if(cpu_sse2(cpu_flags)) printf(" SSE2");
1098 if(cpu_sse3(cpu_flags)) printf(" SSE3"); */
1099 #ifdef OPT_SSE
1100 if(cpu_sse(cpu_flags)) *(d++) = dn_sse;
1101 #endif
1102 #ifdef OPT_SSE_VINTAGE
1103 if(cpu_sse(cpu_flags)) *(d++) = dn_sse_vintage;
1104 #endif
1105 #ifdef OPT_3DNOWEXT
1106 if(cpu_3dnowext(cpu_flags)) *(d++) = dn_dreidnowext;
1107 #endif
1108 #ifdef OPT_3DNOWEXT_VINTAGE
1109 if(cpu_3dnowext(cpu_flags)) *(d++) = dn_dreidnowext_vintage;
1110 #endif
1111 #ifdef OPT_3DNOW
1112 if(cpu_3dnow(cpu_flags)) *(d++) = dn_dreidnow;
1113 #endif
1114 #ifdef OPT_3DNOW_VINTAGE
1115 if(cpu_3dnow(cpu_flags)) *(d++) = dn_dreidnow_vintage;
1116 #endif
1117 #ifdef OPT_MMX
1118 if(cpu_mmx(cpu_flags)) *(d++) = dn_mmx;
1119 #endif
1120 #ifdef OPT_I586
1121 *(d++) = dn_ifuenf;
1122 #endif
1123 #ifdef OPT_I586_DITHER
1124 *(d++) = dn_ifuenf_dither;
1125 #endif
1126 }
1127 #endif
1128 /* just assume that the i486 built is run on a i486 cpu... */
1129 #ifdef OPT_I486
1130 *(d++) = dn_ivier;
1131 #endif
1132 #ifdef OPT_ALTIVEC
1133 *(d++) = dn_altivec;
1134 #endif
1135 /* every supported x86 can do i386, any cpu can do generic */
1136 #ifdef OPT_I386
1137 *(d++) = dn_idrei;
1138 #endif
1139 #ifdef OPT_AVX
1140 if(cpu_avx(cpu_flags)) *(d++) = dn_avx;
1141 #endif
1142 #ifdef OPT_X86_64
1143 *(d++) = dn_x86_64;
1144 #endif
1145 #ifdef OPT_ARM
1146 *(d++) = dn_arm;
1147 #endif
1148 #ifdef OPT_NEON
1149 if(cpu_neon(cpu_flags)) *(d++) = dn_neon;
1150 #endif
1151 #ifdef OPT_NEON64
1152 if(cpu_neon(cpu_flags)) *(d++) = dn_neon64;
1153 #endif
1154 #ifdef OPT_GENERIC
1155 *(d++) = dn_generic;
1156 #endif
1157 #ifdef OPT_GENERIC_DITHER
1158 *(d++) = dn_generic_dither;
1159 #endif
1160 #endif /* ndef OPT_MULTI */
1161 }
1162
mpg123_current_decoder(mpg123_handle * mh)1163 const char* attribute_align_arg mpg123_current_decoder(mpg123_handle *mh)
1164 {
1165 if(mh == NULL) return NULL;
1166
1167 return decname[mh->cpu_opts.type];
1168 }
1169
mpg123_decoders(void)1170 const char attribute_align_arg **mpg123_decoders(void){ return mpg123_decoder_list; }
mpg123_supported_decoders(void)1171 const char attribute_align_arg **mpg123_supported_decoders(void)
1172 {
1173 #ifdef OPT_MULTI
1174 return mpg123_supported_decoder_list;
1175 #else
1176 return mpg123_decoder_list;
1177 #endif
1178 }
1179