1 /* 2 optimize: get a grip on the different optimizations 3 4 copyright 2006-9 by the mpg123 project - free software under the terms of the LGPL 2.1 5 see COPYING and AUTHORS files in distribution or http://mpg123.org 6 initially written by Thomas Orgis, inspired by 3DNow stuff in mpg123.[hc] 7 8 Currently, this file contains the struct and function to choose an optimization variant and works only when OPT_MULTI is in effect. 9 */ 10 11 #define I_AM_OPTIMIZE 12 #include "mpg123lib_intern.h" /* includes optimize.h */ 13 #include "debug.h" 14 15 #if ((defined OPT_X86) || (defined OPT_X86_64) || (defined OPT_NEON) || (defined OPT_NEON64)) && (defined OPT_MULTI) 16 #include "getcpuflags.h" 17 static struct cpuflags cpu_flags; 18 #else 19 /* Faking stuff for non-multi builds. The same code for synth function choice is used. 20 Just no runtime dependency of result... */ 21 #define cpu_flags nothing 22 #define cpu_i586(s) 1 23 #define cpu_fpu(s) 1 24 #define cpu_mmx(s) 1 25 #define cpu_3dnow(s) 1 26 #define cpu_3dnowext(s) 1 27 #define cpu_sse(s) 1 28 #define cpu_sse2(s) 1 29 #define cpu_sse3(s) 1 30 #define cpu_avx(s) 1 31 #define cpu_neon(s) 1 32 #endif 33 34 /* Ugly macros to build conditional synth function array values. */ 35 36 #ifndef NO_8BIT 37 #define IF8(synth) synth, 38 #else 39 #define IF8(synth) 40 #endif 41 42 #ifndef NO_SYNTH32 43 44 #ifndef NO_REAL 45 #define IFREAL(synth) synth, 46 #else 47 #define IFREAL(synth) 48 #endif 49 50 #ifndef NO_32BIT 51 #define IF32(synth) synth 52 #else 53 #define IF32(synth) 54 #endif 55 56 #else 57 58 #define IFREAL(synth) 59 #define IF32(synth) 60 61 #endif 62 63 #ifndef NO_16BIT 64 # define OUT_SYNTHS(synth_16, synth_8, synth_real, synth_32) { synth_16, IF8(synth_8) IFREAL(synth_real) IF32(synth_32) } 65 #else 66 # define OUT_SYNTHS(synth_16, synth_8, synth_real, synth_32) { IF8(synth_8) IFREAL(synth_real) IF32(synth_32) } 67 #endif 68 69 /* The call of left and right plain synth, wrapped. 70 This may be replaced by a direct stereo optimized synth. */ 71 static int synth_stereo_wrap(real *bandPtr_l, real *bandPtr_r, mpg123_handle *fr) 72 { 73 int clip; 74 clip = (fr->synth)(bandPtr_l, 0, fr, 0); 75 clip += (fr->synth)(bandPtr_r, 1, fr, 1); 76 return clip; 77 } 78 79 static const struct synth_s synth_base = 80 { 81 { /* plain */ 82 OUT_SYNTHS(synth_1to1, synth_1to1_8bit, synth_1to1_real, synth_1to1_s32) 83 # ifndef NO_DOWNSAMPLE 84 ,OUT_SYNTHS(synth_2to1, synth_2to1_8bit, synth_2to1_real, synth_2to1_s32) 85 ,OUT_SYNTHS(synth_4to1, synth_4to1_8bit, synth_4to1_real, synth_4to1_s32) 86 # endif 87 # ifndef NO_NTOM 88 ,OUT_SYNTHS(synth_ntom, synth_ntom_8bit, synth_ntom_real, synth_ntom_s32) 89 # endif 90 }, 91 { /* stereo, by default only wrappers over plain synth */ 92 OUT_SYNTHS(synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap) 93 # ifndef NO_DOWNSAMPLE 94 ,OUT_SYNTHS(synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap) 95 ,OUT_SYNTHS(synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap) 96 # endif 97 # ifndef NO_NTOM 98 ,OUT_SYNTHS(synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap) 99 # endif 100 }, 101 { /* mono2stereo */ 102 OUT_SYNTHS(synth_1to1_m2s, synth_1to1_8bit_m2s, synth_1to1_real_m2s, synth_1to1_s32_m2s) 103 # ifndef NO_DOWNSAMPLE 104 ,OUT_SYNTHS(synth_2to1_m2s, synth_2to1_8bit_m2s, synth_2to1_real_m2s, synth_2to1_s32_m2s) 105 ,OUT_SYNTHS(synth_4to1_m2s, synth_4to1_8bit_m2s, synth_4to1_real_m2s, synth_4to1_s32_m2s) 106 # endif 107 # ifndef NO_NTOM 108 ,OUT_SYNTHS(synth_ntom_m2s, synth_ntom_8bit_m2s, synth_ntom_real_m2s, synth_ntom_s32_m2s) 109 # endif 110 }, 111 { /* mono*/ 112 OUT_SYNTHS(synth_1to1_mono, synth_1to1_8bit_mono, synth_1to1_real_mono, synth_1to1_s32_mono) 113 # ifndef NO_DOWNSAMPLE 114 ,OUT_SYNTHS(synth_2to1_mono, synth_2to1_8bit_mono, synth_2to1_real_mono, synth_2to1_s32_mono) 115 ,OUT_SYNTHS(synth_4to1_mono, synth_4to1_8bit_mono, synth_4to1_real_mono, synth_4to1_s32_mono) 116 # endif 117 # ifndef NO_NTOM 118 ,OUT_SYNTHS(synth_ntom_mono, synth_ntom_8bit_mono, synth_ntom_real_mono, synth_ntom_s32_mono) 119 #endif 120 } 121 }; 122 123 #ifdef OPT_X86 124 /* More plain synths for i386 */ 125 const func_synth plain_i386[r_limit][f_limit] = 126 { /* plain */ 127 OUT_SYNTHS(synth_1to1_i386, synth_1to1_8bit_i386, synth_1to1_real_i386, synth_1to1_s32_i386) 128 # ifndef NO_DOWNSAMPLE 129 ,OUT_SYNTHS(synth_2to1_i386, synth_2to1_8bit_i386, synth_2to1_real_i386, synth_2to1_s32_i386) 130 ,OUT_SYNTHS(synth_4to1_i386, synth_4to1_8bit_i386, synth_4to1_real_i386, synth_4to1_s32_i386) 131 # endif 132 # ifndef NO_NTOM 133 ,OUT_SYNTHS(synth_ntom, synth_ntom_8bit, synth_ntom_real, synth_ntom_s32) 134 # endif 135 }; 136 #endif 137 138 139 enum optdec defdec(void){ return defopt; } 140 141 enum optcla decclass(const enum optdec type) 142 { 143 return 144 ( 145 type == mmx 146 || type == sse 147 || type == sse_vintage 148 || type == dreidnowext 149 || type == dreidnowext_vintage 150 || type == x86_64 151 || type == neon 152 || type == neon64 153 || type == avx 154 ) ? mmxsse : normal; 155 } 156 157 static int find_synth(func_synth synth, const func_synth synths[r_limit][f_limit]) 158 { 159 enum synth_resample ri; 160 enum synth_format fi; 161 for(ri=0; ri<r_limit; ++ri) 162 for(fi=0; fi<f_limit; ++fi) 163 if(synth == synths[ri][fi]) 164 return TRUE; 165 166 return FALSE; 167 } 168 169 170 #if defined(OPT_SSE) || defined(OPT_SSE_VINTAGE) 171 /* After knowing that it is either vintage or current SSE, 172 this separates the two. In case of non-OPT_MULTI, only one 173 of OPT_SSE and OPT_SSE_VINTAGE is active. */ 174 static enum optdec sse_or_vintage(mpg123_handle *fr) 175 { 176 enum optdec type; 177 type = sse_vintage; 178 # ifdef OPT_SSE 179 # ifdef OPT_MULTI 180 if(fr->cpu_opts.the_dct36 == dct36_sse) 181 # endif 182 type = sse; 183 # endif 184 return type; 185 } 186 #endif 187 188 /* Determine what kind of decoder is actually active 189 This depends on runtime choices which may cause fallback to i386 or generic code. */ 190 static int find_dectype(mpg123_handle *fr) 191 { 192 enum optdec type = nodec; 193 /* Direct and indirect usage, 1to1 stereo decoding. 194 Concentrating on the plain stereo synth should be fine, mono stuff is derived. */ 195 func_synth basic_synth = fr->synth; 196 #ifndef NO_8BIT 197 #ifndef NO_16BIT 198 if(basic_synth == synth_1to1_8bit_wrap) 199 basic_synth = fr->synths.plain[r_1to1][f_16]; /* That is what's really below the surface. */ 200 #endif 201 #endif 202 203 if(FALSE) ; /* Just to initialize the else if ladder. */ 204 #ifndef NO_16BIT 205 #if defined(OPT_3DNOWEXT) || defined(OPT_3DNOWEXT_VINTAGE) 206 else if(basic_synth == synth_1to1_3dnowext) 207 { 208 type = dreidnowext; 209 # ifdef OPT_3DNOWEXT_VINTAGE 210 # ifdef OPT_MULTI 211 if(fr->cpu_opts.the_dct36 == dct36_3dnowext) 212 # endif 213 type = dreidnowext_vintage; 214 # endif 215 } 216 #endif 217 #if defined(OPT_SSE) || defined(OPT_SSE_VINTAGE) 218 else if(basic_synth == synth_1to1_sse) 219 { 220 type = sse_or_vintage(fr); 221 } 222 #endif 223 #if defined(OPT_3DNOW) || defined(OPT_3DNOW_VINTAGE) 224 else if(basic_synth == synth_1to1_3dnow) 225 { 226 type = dreidnow; 227 # ifdef OPT_3DNOW_VINTAGE 228 # ifdef OPT_MULTI 229 if(fr->cpu_opts.the_dct36 == dct36_3dnow) 230 # endif 231 type = dreidnow_vintage; 232 # endif 233 } 234 #endif 235 #ifdef OPT_MMX 236 else if(basic_synth == synth_1to1_mmx) type = mmx; 237 #endif 238 #ifdef OPT_I586_DITHER 239 else if(basic_synth == synth_1to1_i586_dither) type = ifuenf_dither; 240 #endif 241 #ifdef OPT_I586 242 else if(basic_synth == synth_1to1_i586) type = ifuenf; 243 #endif 244 #ifdef OPT_ALTIVEC 245 else if(basic_synth == synth_1to1_altivec) type = altivec; 246 #endif 247 #ifdef OPT_X86_64 248 else if(basic_synth == synth_1to1_x86_64) type = x86_64; 249 #endif 250 #ifdef OPT_AVX 251 else if(basic_synth == synth_1to1_avx) type = avx; 252 #endif 253 #ifdef OPT_ARM 254 else if(basic_synth == synth_1to1_arm) type = arm; 255 #endif 256 #ifdef OPT_NEON 257 else if(basic_synth == synth_1to1_neon) type = neon; 258 #endif 259 #ifdef OPT_NEON64 260 else if(basic_synth == synth_1to1_neon64) type = neon64; 261 #endif 262 #ifdef OPT_GENERIC_DITHER 263 else if(basic_synth == synth_1to1_dither) type = generic_dither; 264 #endif 265 #ifdef OPT_DITHER /* either i586 or generic! */ 266 #ifndef NO_DOWNSAMPLE 267 else if 268 ( 269 basic_synth == synth_2to1_dither 270 || basic_synth == synth_4to1_dither 271 ) type = generic_dither; 272 #endif 273 #endif 274 #endif /* 16bit */ 275 276 #ifndef NO_SYNTH32 277 278 #ifndef NO_REAL 279 #if defined(OPT_SSE) || defined(OPT_SSE_VINTAGE) 280 else if(basic_synth == synth_1to1_real_sse) 281 { 282 type = sse_or_vintage(fr); 283 } 284 #endif 285 #ifdef OPT_X86_64 286 else if(basic_synth == synth_1to1_real_x86_64) type = x86_64; 287 #endif 288 #ifdef OPT_AVX 289 else if(basic_synth == synth_1to1_real_avx) type = avx; 290 #endif 291 #ifdef OPT_ALTIVEC 292 else if(basic_synth == synth_1to1_real_altivec) type = altivec; 293 #endif 294 #ifdef OPT_NEON 295 else if(basic_synth == synth_1to1_real_neon) type = neon; 296 #endif 297 #ifdef OPT_NEON64 298 else if(basic_synth == synth_1to1_real_neon64) type = neon64; 299 #endif 300 301 #endif /* real */ 302 303 #ifndef NO_32BIT 304 #if defined(OPT_SSE) || defined(OPT_SSE_VINTAGE) 305 else if(basic_synth == synth_1to1_s32_sse) 306 { 307 type = sse_or_vintage(fr); 308 } 309 #endif 310 #ifdef OPT_X86_64 311 else if(basic_synth == synth_1to1_s32_x86_64) type = x86_64; 312 #endif 313 #ifdef OPT_AVX 314 else if(basic_synth == synth_1to1_s32_avx) type = avx; 315 #endif 316 #ifdef OPT_ALTIVEC 317 else if(basic_synth == synth_1to1_s32_altivec) type = altivec; 318 #endif 319 #ifdef OPT_NEON 320 else if(basic_synth == synth_1to1_s32_neon) type = neon; 321 #endif 322 #ifdef OPT_NEON64 323 else if(basic_synth == synth_1to1_s32_neon64) type = neon64; 324 #endif 325 #endif /* 32bit */ 326 327 #endif /* any 32 bit synth */ 328 329 #ifdef OPT_X86 330 else if(find_synth(basic_synth, plain_i386)) 331 type = idrei; 332 #endif 333 334 else if(find_synth(basic_synth, synth_base.plain)) 335 type = generic; 336 337 338 339 #ifdef OPT_I486 340 /* i486 is special ... the specific code is in use for 16bit 1to1 stereo 341 otherwise we have i386 active... but still, the distinction doesn't matter*/ 342 type = ivier; 343 #endif 344 345 if(type != nodec) 346 { 347 fr->cpu_opts.type = type; 348 fr->cpu_opts.class = decclass(type); 349 350 debug3("determined active decoder type %i (%s) of class %i", type, decname[type], fr->cpu_opts.class); 351 return MPG123_OK; 352 } 353 else 354 { 355 if(NOQUIET) error("Unable to determine active decoder type -- this is SERIOUS b0rkage!"); 356 357 fr->err = MPG123_BAD_DECODER_SETUP; 358 return MPG123_ERR; 359 } 360 } 361 362 /* set synth functions for current frame, optimizations handled by opt_* macros */ 363 int set_synth_functions(mpg123_handle *fr) 364 { 365 enum synth_resample resample = r_none; 366 enum synth_format basic_format = f_none; /* Default is always 16bit, or whatever. */ 367 368 /* Select the basic output format, different from 16bit: 8bit, real. */ 369 if(FALSE){} 370 #ifndef NO_16BIT 371 else if(fr->af.dec_enc & MPG123_ENC_16) 372 basic_format = f_16; 373 #endif 374 #ifndef NO_8BIT 375 else if(fr->af.dec_enc & MPG123_ENC_8) 376 basic_format = f_8; 377 #endif 378 #ifndef NO_REAL 379 else if(fr->af.dec_enc & MPG123_ENC_FLOAT) 380 basic_format = f_real; 381 #endif 382 #ifndef NO_32BIT 383 /* 24 bit integer means decoding to 32 bit first. */ 384 else if(fr->af.dec_enc & MPG123_ENC_32 || fr->af.dec_enc & MPG123_ENC_24) 385 basic_format = f_32; 386 #endif 387 388 /* Make sure the chosen format is compiled into this lib. */ 389 if(basic_format == f_none) 390 { 391 if(NOQUIET) error("set_synth_functions: This output format is disabled in this build!"); 392 393 return -1; 394 } 395 396 /* Be explicit about downsampling variant. */ 397 switch(fr->down_sample) 398 { 399 case 0: resample = r_1to1; break; 400 #ifndef NO_DOWNSAMPLE 401 case 1: resample = r_2to1; break; 402 case 2: resample = r_4to1; break; 403 #endif 404 #ifndef NO_NTOM 405 case 3: resample = r_ntom; break; 406 #endif 407 } 408 409 if(resample == r_none) 410 { 411 if(NOQUIET) error("set_synth_functions: This resampling mode is not supported in this build!"); 412 413 return -1; 414 } 415 416 debug2("selecting synth: resample=%i format=%i", resample, basic_format); 417 /* Finally selecting the synth functions for stereo / mono. */ 418 fr->synth = fr->synths.plain[resample][basic_format]; 419 fr->synth_stereo = fr->synths.stereo[resample][basic_format]; 420 fr->synth_mono = fr->af.channels==2 421 ? fr->synths.mono2stereo[resample][basic_format] /* Mono MPEG file decoded to stereo. */ 422 : fr->synths.mono[resample][basic_format]; /* Mono MPEG file decoded to mono. */ 423 424 if(find_dectype(fr) != MPG123_OK) /* Actually determine the currently active decoder breed. */ 425 { 426 fr->err = MPG123_BAD_DECODER_SETUP; 427 return MPG123_ERR; 428 } 429 430 if(frame_buffers(fr) != 0) 431 { 432 fr->err = MPG123_NO_BUFFERS; 433 if(NOQUIET) error("Failed to set up decoder buffers!"); 434 435 return MPG123_ERR; 436 } 437 438 #ifndef NO_8BIT 439 if(basic_format == f_8) 440 { 441 if(make_conv16to8_table(fr) != 0) 442 { 443 if(NOQUIET) error("Failed to set up conv16to8 table!"); 444 /* it's a bit more work to get proper error propagation up */ 445 return -1; 446 } 447 } 448 #endif 449 450 #ifdef OPT_MMXORSSE 451 /* Special treatment for MMX, SSE and 3DNowExt stuff. 452 The real-decoding SSE for x86-64 uses normal tables! */ 453 if(fr->cpu_opts.class == mmxsse 454 # ifndef NO_REAL 455 && basic_format != f_real 456 # endif 457 # ifndef NO_32BIT 458 && basic_format != f_32 459 # endif 460 # ifdef ACCURATE_ROUNDING 461 && fr->cpu_opts.type != sse 462 && fr->cpu_opts.type != sse_vintage 463 && fr->cpu_opts.type != x86_64 464 && fr->cpu_opts.type != neon 465 && fr->cpu_opts.type != neon64 466 && fr->cpu_opts.type != avx 467 # endif 468 ) 469 { 470 #ifndef NO_LAYER3 471 init_layer3_stuff(fr, init_layer3_gainpow2_mmx); 472 #endif 473 #ifndef NO_LAYER12 474 init_layer12_stuff(fr, init_layer12_table_mmx); 475 #endif 476 fr->make_decode_tables = make_decode_tables_mmx; 477 } 478 else 479 #endif 480 { 481 #ifndef NO_LAYER3 482 init_layer3_stuff(fr, init_layer3_gainpow2); 483 #endif 484 #ifndef NO_LAYER12 485 init_layer12_stuff(fr, init_layer12_table); 486 #endif 487 fr->make_decode_tables = make_decode_tables; 488 } 489 490 /* We allocated the table buffers just now, so (re)create the tables. */ 491 fr->make_decode_tables(fr); 492 493 return 0; 494 } 495 496 int frame_cpu_opt(mpg123_handle *fr, const char* cpu) 497 { 498 const char* chosen = ""; /* the chosen decoder opt as string */ 499 enum optdec want_dec = nodec; 500 int done = 0; 501 int auto_choose = 0; 502 #ifdef OPT_DITHER 503 int dithered = FALSE; /* If some dithered decoder is chosen. */ 504 #endif 505 506 want_dec = dectype(cpu); 507 auto_choose = want_dec == autodec; 508 /* Fill whole array of synth functions with generic code first. */ 509 fr->synths = synth_base; 510 511 #ifndef OPT_MULTI 512 { 513 if(!auto_choose && want_dec != defopt) 514 { 515 if(NOQUIET) error2("you wanted decoder type %i, I only have %i", want_dec, defopt); 516 } 517 auto_choose = TRUE; /* There will be only one choice anyway. */ 518 } 519 #endif 520 521 fr->cpu_opts.type = nodec; 522 #ifdef OPT_MULTI 523 #ifndef NO_LAYER3 524 #if (defined OPT_3DNOW_VINTAGE || defined OPT_3DNOWEXT_VINTAGE || defined OPT_SSE || defined OPT_X86_64 || defined OPT_AVX || defined OPT_NEON || defined OPT_NEON64) 525 fr->cpu_opts.the_dct36 = dct36; 526 #endif 527 #endif 528 #endif 529 /* covers any i386+ cpu; they actually differ only in the synth_1to1 function, mostly... */ 530 #ifdef OPT_X86 531 if(cpu_i586(cpu_flags)) 532 { 533 # ifdef OPT_MULTI 534 debug2("standard flags: 0x%08x\textended flags: 0x%08x", cpu_flags.std, cpu_flags.ext); 535 # endif 536 # ifdef OPT_SSE 537 if( !done && (auto_choose || want_dec == sse) 538 && cpu_sse(cpu_flags) && cpu_mmx(cpu_flags) ) 539 { 540 chosen = dn_sse; 541 fr->cpu_opts.type = sse; 542 #ifdef OPT_MULTI 543 # ifndef NO_LAYER3 544 /* if(cpu_fast_sse(cpu_flags)) */ fr->cpu_opts.the_dct36 = dct36_sse; 545 # endif 546 #endif 547 # ifndef NO_16BIT 548 fr->synths.plain[r_1to1][f_16] = synth_1to1_sse; 549 # ifdef ACCURATE_ROUNDING 550 fr->synths.stereo[r_1to1][f_16] = synth_1to1_stereo_sse; 551 # endif 552 # endif 553 # ifndef NO_REAL 554 fr->synths.plain[r_1to1][f_real] = synth_1to1_real_sse; 555 fr->synths.stereo[r_1to1][f_real] = synth_1to1_real_stereo_sse; 556 # endif 557 # ifndef NO_32BIT 558 fr->synths.plain[r_1to1][f_32] = synth_1to1_s32_sse; 559 fr->synths.stereo[r_1to1][f_32] = synth_1to1_s32_stereo_sse; 560 # endif 561 done = 1; 562 } 563 # endif 564 # ifdef OPT_SSE_VINTAGE 565 if( !done && (auto_choose || want_dec == sse_vintage) 566 && cpu_sse(cpu_flags) && cpu_mmx(cpu_flags) ) 567 { 568 chosen = dn_sse_vintage; 569 fr->cpu_opts.type = sse_vintage; 570 # ifndef NO_16BIT 571 fr->synths.plain[r_1to1][f_16] = synth_1to1_sse; 572 # ifdef ACCURATE_ROUNDING 573 fr->synths.stereo[r_1to1][f_16] = synth_1to1_stereo_sse; 574 # endif 575 # endif 576 # ifndef NO_REAL 577 fr->synths.plain[r_1to1][f_real] = synth_1to1_real_sse; 578 fr->synths.stereo[r_1to1][f_real] = synth_1to1_real_stereo_sse; 579 # endif 580 # ifndef NO_32BIT 581 fr->synths.plain[r_1to1][f_32] = synth_1to1_s32_sse; 582 fr->synths.stereo[r_1to1][f_32] = synth_1to1_s32_stereo_sse; 583 # endif 584 done = 1; 585 } 586 # endif 587 # ifdef OPT_3DNOWEXT 588 if( !done && (auto_choose || want_dec == dreidnowext) 589 && cpu_3dnow(cpu_flags) 590 && cpu_3dnowext(cpu_flags) 591 && cpu_mmx(cpu_flags) ) 592 { 593 chosen = dn_dreidnowext; 594 fr->cpu_opts.type = dreidnowext; 595 # ifndef NO_16BIT 596 fr->synths.plain[r_1to1][f_16] = synth_1to1_3dnowext; 597 # endif 598 done = 1; 599 } 600 # endif 601 # ifdef OPT_3DNOWEXT_VINTAGE 602 if( !done && (auto_choose || want_dec == dreidnowext_vintage) 603 && cpu_3dnow(cpu_flags) 604 && cpu_3dnowext(cpu_flags) 605 && cpu_mmx(cpu_flags) ) 606 { 607 chosen = dn_dreidnowext_vintage; 608 fr->cpu_opts.type = dreidnowext_vintage; 609 #ifdef OPT_MULTI 610 # ifndef NO_LAYER3 611 fr->cpu_opts.the_dct36 = dct36_3dnowext; 612 # endif 613 #endif 614 # ifndef NO_16BIT 615 fr->synths.plain[r_1to1][f_16] = synth_1to1_3dnowext; 616 # endif 617 done = 1; 618 } 619 # endif 620 # ifdef OPT_3DNOW 621 if( !done && (auto_choose || want_dec == dreidnow) 622 && cpu_3dnow(cpu_flags) && cpu_mmx(cpu_flags) ) 623 { 624 chosen = dn_dreidnow; 625 fr->cpu_opts.type = dreidnow; 626 # ifndef NO_16BIT 627 fr->synths.plain[r_1to1][f_16] = synth_1to1_3dnow; 628 # endif 629 done = 1; 630 } 631 # endif 632 # ifdef OPT_3DNOW_VINTAGE 633 if( !done && (auto_choose || want_dec == dreidnow_vintage) 634 && cpu_3dnow(cpu_flags) && cpu_mmx(cpu_flags) ) 635 { 636 chosen = dn_dreidnow_vintage; 637 fr->cpu_opts.type = dreidnow_vintage; 638 #ifdef OPT_MULTI 639 # ifndef NO_LAYER3 640 fr->cpu_opts.the_dct36 = dct36_3dnow; 641 # endif 642 #endif 643 # ifndef NO_16BIT 644 fr->synths.plain[r_1to1][f_16] = synth_1to1_3dnow; 645 # endif 646 done = 1; 647 } 648 # endif 649 #ifdef OPT_MMX 650 if( !done && (auto_choose || want_dec == mmx) 651 && cpu_mmx(cpu_flags) ) 652 { 653 chosen = dn_mmx; 654 fr->cpu_opts.type = mmx; 655 # ifndef NO_16BIT 656 fr->synths.plain[r_1to1][f_16] = synth_1to1_mmx; 657 # endif 658 done = 1; 659 } 660 #endif 661 #ifdef OPT_I586 662 if(!done && (auto_choose || want_dec == ifuenf)) 663 { 664 chosen = "i586/pentium"; 665 fr->cpu_opts.type = ifuenf; 666 # ifndef NO_16BIT 667 fr->synths.plain[r_1to1][f_16] = synth_1to1_i586; 668 # endif 669 done = 1; 670 } 671 #endif 672 #ifdef OPT_I586_DITHER 673 if(!done && (auto_choose || want_dec == ifuenf_dither)) 674 { 675 chosen = "dithered i586/pentium"; 676 fr->cpu_opts.type = ifuenf_dither; 677 dithered = TRUE; 678 # ifndef NO_16BIT 679 fr->synths.plain[r_1to1][f_16] = synth_1to1_i586_dither; 680 # ifndef NO_DOWNSAMPLE 681 fr->synths.plain[r_2to1][f_16] = synth_2to1_dither; 682 fr->synths.plain[r_4to1][f_16] = synth_4to1_dither; 683 # endif 684 # endif 685 done = 1; 686 } 687 #endif 688 } 689 #ifdef OPT_I486 690 /* That won't cooperate in multi opt mode - forcing i486 in layer3.c 691 But still... here it is... maybe for real use in future. */ 692 if(!done && (auto_choose || want_dec == ivier)) 693 { 694 chosen = dn_ivier; 695 fr->cpu_opts.type = ivier; 696 done = 1; 697 } 698 #endif 699 #ifdef OPT_I386 700 if(!done && (auto_choose || want_dec == idrei)) 701 { 702 chosen = dn_idrei; 703 fr->cpu_opts.type = idrei; 704 done = 1; 705 } 706 #endif 707 708 if(done) 709 { 710 /* 711 We have chosen some x86 decoder... fillup some i386 stuff. 712 There is an open question about using dithered synth_1to1 for 8bit wrappers. 713 For quality it won't make sense, but wrapped i586_dither wrapped may still be faster... 714 */ 715 enum synth_resample ri; 716 enum synth_format fi; 717 # ifndef NO_8BIT 718 # ifndef NO_16BIT /* possibility to use a 16->8 wrapper... */ 719 if(fr->synths.plain[r_1to1][f_16] != synth_base.plain[r_1to1][f_16]) 720 { 721 fr->synths.plain[r_1to1][f_8] = synth_1to1_8bit_wrap; 722 fr->synths.mono[r_1to1][f_8] = synth_1to1_8bit_wrap_mono; 723 fr->synths.mono2stereo[r_1to1][f_8] = synth_1to1_8bit_wrap_m2s; 724 } 725 # endif 726 # endif 727 for(ri=0; ri<r_limit; ++ri) 728 for(fi=0; fi<f_limit; ++fi) 729 { 730 if(fr->synths.plain[ri][fi] == synth_base.plain[ri][fi]) 731 fr->synths.plain[ri][fi] = plain_i386[ri][fi]; 732 } 733 } 734 735 #endif /* OPT_X86 */ 736 737 #ifdef OPT_AVX 738 if(!done && (auto_choose || want_dec == avx) && cpu_avx(cpu_flags)) 739 { 740 chosen = "x86-64 (AVX)"; 741 fr->cpu_opts.type = avx; 742 #ifdef OPT_MULTI 743 # ifndef NO_LAYER3 744 fr->cpu_opts.the_dct36 = dct36_avx; 745 # endif 746 #endif 747 # ifndef NO_16BIT 748 fr->synths.plain[r_1to1][f_16] = synth_1to1_avx; 749 fr->synths.stereo[r_1to1][f_16] = synth_1to1_stereo_avx; 750 # endif 751 # ifndef NO_REAL 752 fr->synths.plain[r_1to1][f_real] = synth_1to1_real_avx; 753 fr->synths.stereo[r_1to1][f_real] = synth_1to1_fltst_avx; 754 # endif 755 # ifndef NO_32BIT 756 fr->synths.plain[r_1to1][f_32] = synth_1to1_s32_avx; 757 fr->synths.stereo[r_1to1][f_32] = synth_1to1_s32_stereo_avx; 758 # endif 759 done = 1; 760 } 761 #endif 762 763 #ifdef OPT_X86_64 764 if(!done && (auto_choose || want_dec == x86_64)) 765 { 766 chosen = "x86-64 (SSE)"; 767 fr->cpu_opts.type = x86_64; 768 #ifdef OPT_MULTI 769 # ifndef NO_LAYER3 770 fr->cpu_opts.the_dct36 = dct36_x86_64; 771 # endif 772 #endif 773 # ifndef NO_16BIT 774 fr->synths.plain[r_1to1][f_16] = synth_1to1_x86_64; 775 fr->synths.stereo[r_1to1][f_16] = synth_1to1_stereo_x86_64; 776 # endif 777 # ifndef NO_REAL 778 fr->synths.plain[r_1to1][f_real] = synth_1to1_real_x86_64; 779 fr->synths.stereo[r_1to1][f_real] = synth_1to1_real_stereo_x86_64; 780 # endif 781 # ifndef NO_32BIT 782 fr->synths.plain[r_1to1][f_32] = synth_1to1_s32_x86_64; 783 fr->synths.stereo[r_1to1][f_32] = synth_1to1_s32_stereo_x86_64; 784 # endif 785 done = 1; 786 } 787 #endif 788 789 # ifdef OPT_ALTIVEC 790 if(!done && (auto_choose || want_dec == altivec)) 791 { 792 chosen = dn_altivec; 793 fr->cpu_opts.type = altivec; 794 # ifndef NO_16BIT 795 fr->synths.plain[r_1to1][f_16] = synth_1to1_altivec; 796 fr->synths.stereo[r_1to1][f_16] = synth_1to1_stereo_altivec; 797 # endif 798 # ifndef NO_REAL 799 fr->synths.plain[r_1to1][f_real] = synth_1to1_real_altivec; 800 fr->synths.stereo[r_1to1][f_real] = synth_1to1_fltst_altivec; 801 # endif 802 # ifndef NO_32BIT 803 fr->synths.plain[r_1to1][f_32] = synth_1to1_s32_altivec; 804 fr->synths.stereo[r_1to1][f_32] = synth_1to1_s32_stereo_altivec; 805 # endif 806 done = 1; 807 } 808 # endif 809 810 # ifdef OPT_NEON 811 if(!done && (auto_choose || want_dec == neon) && cpu_neon(cpu_flags)) 812 { 813 chosen = dn_neon; 814 fr->cpu_opts.type = neon; 815 #ifdef OPT_MULTI 816 # ifndef NO_LAYER3 817 fr->cpu_opts.the_dct36 = dct36_neon; 818 # endif 819 #endif 820 # ifndef NO_16BIT 821 fr->synths.plain[r_1to1][f_16] = synth_1to1_neon; 822 fr->synths.stereo[r_1to1][f_16] = synth_1to1_stereo_neon; 823 # endif 824 # ifndef NO_REAL 825 fr->synths.plain[r_1to1][f_real] = synth_1to1_real_neon; 826 fr->synths.stereo[r_1to1][f_real] = synth_1to1_real_stereo_neon; 827 # endif 828 # ifndef NO_32BIT 829 fr->synths.plain[r_1to1][f_32] = synth_1to1_s32_neon; 830 fr->synths.stereo[r_1to1][f_32] = synth_1to1_s32_stereo_neon; 831 # endif 832 done = 1; 833 } 834 # endif 835 836 # ifdef OPT_ARM 837 if(!done && (auto_choose || want_dec == arm)) 838 { 839 chosen = dn_arm; 840 fr->cpu_opts.type = arm; 841 # ifndef NO_16BIT 842 fr->synths.plain[r_1to1][f_16] = synth_1to1_arm; 843 # endif 844 done = 1; 845 } 846 # endif 847 848 # ifdef OPT_NEON64 849 if(!done && (auto_choose || want_dec == neon64) && cpu_neon(cpu_flags)) 850 { 851 chosen = dn_neon64; 852 fr->cpu_opts.type = neon64; 853 #ifdef OPT_MULTI 854 # ifndef NO_LAYER3 855 fr->cpu_opts.the_dct36 = dct36_neon64; 856 # endif 857 #endif 858 # ifndef NO_16BIT 859 fr->synths.plain[r_1to1][f_16] = synth_1to1_neon64; 860 fr->synths.stereo[r_1to1][f_16] = synth_1to1_stereo_neon64; 861 # endif 862 # ifndef NO_REAL 863 fr->synths.plain[r_1to1][f_real] = synth_1to1_real_neon64; 864 fr->synths.stereo[r_1to1][f_real] = synth_1to1_fltst_neon64; 865 # endif 866 # ifndef NO_32BIT 867 fr->synths.plain[r_1to1][f_32] = synth_1to1_s32_neon64; 868 fr->synths.stereo[r_1to1][f_32] = synth_1to1_s32st_neon64; 869 # endif 870 done = 1; 871 } 872 # endif 873 874 # ifdef OPT_GENERIC 875 if(!done && (auto_choose || want_dec == generic)) 876 { 877 chosen = dn_generic; 878 fr->cpu_opts.type = generic; 879 done = 1; 880 } 881 # endif 882 883 #ifdef OPT_GENERIC_DITHER 884 if(!done && (auto_choose || want_dec == generic_dither)) 885 { 886 chosen = "dithered generic"; 887 fr->cpu_opts.type = generic_dither; 888 dithered = TRUE; 889 # ifndef NO_16BIT 890 fr->synths.plain[r_1to1][f_16] = synth_1to1_dither; 891 # ifndef NO_DOWNSAMPLE 892 fr->synths.plain[r_2to1][f_16] = synth_2to1_dither; 893 fr->synths.plain[r_4to1][f_16] = synth_4to1_dither; 894 # endif 895 # endif 896 done = 1; 897 } 898 #endif 899 900 fr->cpu_opts.class = decclass(fr->cpu_opts.type); 901 902 # ifndef NO_8BIT 903 # ifndef NO_16BIT /* possibility to use a 16->8 wrapper... */ 904 /* Last chance to use some optimized routine via generic wrappers (for 8bit). */ 905 if( fr->cpu_opts.type != ifuenf_dither 906 && fr->cpu_opts.type != generic_dither 907 && fr->synths.plain[r_1to1][f_16] != synth_base.plain[r_1to1][f_16] ) 908 { 909 fr->synths.plain[r_1to1][f_8] = synth_1to1_8bit_wrap; 910 fr->synths.mono[r_1to1][f_8] = synth_1to1_8bit_wrap_mono; 911 fr->synths.mono2stereo[r_1to1][f_8] = synth_1to1_8bit_wrap_m2s; 912 } 913 # endif 914 # endif 915 916 #ifdef OPT_DITHER 917 if(done && dithered) 918 { 919 /* run-time dither noise table generation */ 920 if(!frame_dither_init(fr)) 921 { 922 if(NOQUIET) error("Dither noise setup failed!"); 923 return 0; 924 } 925 } 926 #endif 927 928 if(done) 929 { 930 if(VERBOSE) fprintf(stderr, "Decoder: %s\n", chosen); 931 return 1; 932 } 933 else 934 { 935 if(NOQUIET) error("Could not set optimization!"); 936 return 0; 937 } 938 } 939 940 enum optdec dectype(const char* decoder) 941 { 942 enum optdec dt; 943 if( (decoder == NULL) 944 || (decoder[0] == 0) ) 945 return autodec; 946 947 for(dt=autodec; dt<nodec; ++dt) 948 if(!strcasecmp(decoder, decname[dt])) return dt; 949 950 return nodec; /* If we found nothing... */ 951 } 952 953 #ifdef OPT_MULTI 954 955 /* same number of entries as full list, but empty at beginning */ 956 static const char *mpg123_supported_decoder_list[] = 957 { 958 #ifdef OPT_SSE 959 NULL, 960 #endif 961 #ifdef OPT_SSE_VINTAGE 962 NULL, 963 #endif 964 #ifdef OPT_3DNOWEXT 965 NULL, 966 #endif 967 #ifdef OPT_3DNOWEXT_VINTAGE 968 NULL, 969 #endif 970 #ifdef OPT_3DNOW 971 NULL, 972 #endif 973 #ifdef OPT_3DNOW_VINTAGE 974 NULL, 975 #endif 976 #ifdef OPT_MMX 977 NULL, 978 #endif 979 #ifdef OPT_I586 980 NULL, 981 #endif 982 #ifdef OPT_I586_DITHER 983 NULL, 984 #endif 985 #ifdef OPT_I486 986 NULL, 987 #endif 988 #ifdef OPT_I386 989 NULL, 990 #endif 991 #ifdef OPT_ALTIVEC 992 NULL, 993 #endif 994 #ifdef OPT_AVX 995 NULL, 996 #endif 997 #ifdef OPT_X86_64 998 NULL, 999 #endif 1000 #ifdef OPT_ARM 1001 NULL, 1002 #endif 1003 #ifdef OPT_NEON 1004 NULL, 1005 #endif 1006 #ifdef OPT_NEON64 1007 NULL, 1008 #endif 1009 #ifdef OPT_GENERIC_FLOAT 1010 NULL, 1011 #endif 1012 # ifdef OPT_GENERIC 1013 NULL, 1014 # endif 1015 # ifdef OPT_GENERIC_DITHER 1016 NULL, 1017 # endif 1018 NULL 1019 }; 1020 #endif 1021 1022 static const char *mpg123_decoder_list[] = 1023 { 1024 #ifdef OPT_SSE 1025 dn_sse, 1026 #endif 1027 #ifdef OPT_SSE_VINTAGE 1028 dn_sse_vintage, 1029 #endif 1030 #ifdef OPT_3DNOWEXT 1031 dn_dreidnowext, 1032 #endif 1033 #ifdef OPT_3DNOWEXT_VINTAGE 1034 dn_dreidnowext_vintage, 1035 #endif 1036 #ifdef OPT_3DNOW 1037 dn_dreidnow, 1038 #endif 1039 #ifdef OPT_3DNOW_VINTAGE 1040 dn_dreidnow_vintage, 1041 #endif 1042 #ifdef OPT_MMX 1043 dn_mmx, 1044 #endif 1045 #ifdef OPT_I586 1046 dn_ifuenf, 1047 #endif 1048 #ifdef OPT_I586_DITHER 1049 dn_ifuenf_dither, 1050 #endif 1051 #ifdef OPT_I486 1052 dn_ivier, 1053 #endif 1054 #ifdef OPT_I386 1055 dn_idrei, 1056 #endif 1057 #ifdef OPT_ALTIVEC 1058 dn_altivec, 1059 #endif 1060 #ifdef OPT_AVX 1061 dn_avx, 1062 #endif 1063 #ifdef OPT_X86_64 1064 dn_x86_64, 1065 #endif 1066 #ifdef OPT_ARM 1067 dn_arm, 1068 #endif 1069 #ifdef OPT_NEON 1070 dn_neon, 1071 #endif 1072 #ifdef OPT_NEON64 1073 dn_neon64, 1074 #endif 1075 #ifdef OPT_GENERIC 1076 dn_generic, 1077 #endif 1078 #ifdef OPT_GENERIC_DITHER 1079 dn_generic_dither, 1080 #endif 1081 NULL 1082 }; 1083 1084 void check_decoders(void ) 1085 { 1086 #ifndef OPT_MULTI 1087 /* In non-multi mode, only the full list (one entry) is used. */ 1088 return; 1089 #else 1090 const char **d = mpg123_supported_decoder_list; 1091 #if (defined OPT_X86) || (defined OPT_X86_64) || (defined OPT_NEON) || (defined OPT_NEON64) 1092 getcpuflags(&cpu_flags); 1093 #endif 1094 #ifdef OPT_X86 1095 if(cpu_i586(cpu_flags)) 1096 { 1097 /* not yet: if(cpu_sse2(cpu_flags)) printf(" SSE2"); 1098 if(cpu_sse3(cpu_flags)) printf(" SSE3"); */ 1099 #ifdef OPT_SSE 1100 if(cpu_sse(cpu_flags)) *(d++) = dn_sse; 1101 #endif 1102 #ifdef OPT_SSE_VINTAGE 1103 if(cpu_sse(cpu_flags)) *(d++) = dn_sse_vintage; 1104 #endif 1105 #ifdef OPT_3DNOWEXT 1106 if(cpu_3dnowext(cpu_flags)) *(d++) = dn_dreidnowext; 1107 #endif 1108 #ifdef OPT_3DNOWEXT_VINTAGE 1109 if(cpu_3dnowext(cpu_flags)) *(d++) = dn_dreidnowext_vintage; 1110 #endif 1111 #ifdef OPT_3DNOW 1112 if(cpu_3dnow(cpu_flags)) *(d++) = dn_dreidnow; 1113 #endif 1114 #ifdef OPT_3DNOW_VINTAGE 1115 if(cpu_3dnow(cpu_flags)) *(d++) = dn_dreidnow_vintage; 1116 #endif 1117 #ifdef OPT_MMX 1118 if(cpu_mmx(cpu_flags)) *(d++) = dn_mmx; 1119 #endif 1120 #ifdef OPT_I586 1121 *(d++) = dn_ifuenf; 1122 #endif 1123 #ifdef OPT_I586_DITHER 1124 *(d++) = dn_ifuenf_dither; 1125 #endif 1126 } 1127 #endif 1128 /* just assume that the i486 built is run on a i486 cpu... */ 1129 #ifdef OPT_I486 1130 *(d++) = dn_ivier; 1131 #endif 1132 #ifdef OPT_ALTIVEC 1133 *(d++) = dn_altivec; 1134 #endif 1135 /* every supported x86 can do i386, any cpu can do generic */ 1136 #ifdef OPT_I386 1137 *(d++) = dn_idrei; 1138 #endif 1139 #ifdef OPT_AVX 1140 if(cpu_avx(cpu_flags)) *(d++) = dn_avx; 1141 #endif 1142 #ifdef OPT_X86_64 1143 *(d++) = dn_x86_64; 1144 #endif 1145 #ifdef OPT_ARM 1146 *(d++) = dn_arm; 1147 #endif 1148 #ifdef OPT_NEON 1149 if(cpu_neon(cpu_flags)) *(d++) = dn_neon; 1150 #endif 1151 #ifdef OPT_NEON64 1152 if(cpu_neon(cpu_flags)) *(d++) = dn_neon64; 1153 #endif 1154 #ifdef OPT_GENERIC 1155 *(d++) = dn_generic; 1156 #endif 1157 #ifdef OPT_GENERIC_DITHER 1158 *(d++) = dn_generic_dither; 1159 #endif 1160 #endif /* ndef OPT_MULTI */ 1161 } 1162 1163 const char* attribute_align_arg mpg123_current_decoder(mpg123_handle *mh) 1164 { 1165 if(mh == NULL) return NULL; 1166 1167 return decname[mh->cpu_opts.type]; 1168 } 1169 1170 const char attribute_align_arg **mpg123_decoders(void){ return mpg123_decoder_list; } 1171 const char attribute_align_arg **mpg123_supported_decoders(void) 1172 { 1173 #ifdef OPT_MULTI 1174 return mpg123_supported_decoder_list; 1175 #else 1176 return mpg123_decoder_list; 1177 #endif 1178 } 1179